libname _distiml "&sysincludefiledir./internal/iml_modules";

/*
Simulate Usual Intakes

Description:

	Simulates usual intakes for subjects that represent the
	distribution of true usual intakes.
	
Parameters:

	- multivar_mcmc_lib: The SAS library that contains the output datasets of an MCMC model. (default = WORK)
	- multivar_mcmc_model: Prefix for the output datasets of the MCMC model (max 23 characters).
												 See nci_multivar_mcmc macro for more details.
	- distrib_population: A SAS data set.
												Must contain all covariates and never-consumer covariates used in the MCMC model.
	- id: Variable that identifies each subject in distrib_population.
	- weight: Variable with weighting for each subject in distrib_population.
	- nuisance_weight: Variable with the weighting for each nuisance variable level in distrib_population.
	- use_mcmc_u_matrices: Flag specifying whether previously saved post-MCMC random effects (u) matrix draws should be used instead of simulating new ones.
												 Used in regression calibration procedures, see Regression Calibration section. (Y/N, default = N)
	- distrib_seed: Postive integer starting seed for the random number generator. 
									If blank, uses a randomly generated integer from 1 to 2*10^7, inclusive. (default = blank)
	- num_simulated_u: Integer specifying the number of simulated random effect (u) random draws should be made for each subject. 
										 Has no effect if use_mcmc_u_matrices is Y. (default = 500)
	- dietary_supplements: A SAS data set of dietary supplements.
												 Must contain the following columns:
												 	 - variable: The name of the variable.
												 	 - supplement: The name of the dietary supplement to apply to the variable.
	- additional_output: Space-delimited list of names of additional variables in distrib_population to be included in the output dataset.
	- outlib: The SAS library to store output datasets. (default = WORK)
	- outname: The name of the output dataset.
	
Output:

	The following SAS data sets are created in outlib and prefixed with outname:
	
		- outname: A SAS data set where each row represents a simulated subject.
							 Contains the following columns:
							 	 - &id.: Identifier for each subject.
								 - &weight.: Weighting for each subject.
								 - Variables specified in additional_output.
								 - usl_ variables: The usual intake for each variable.
								 - sup_ variables: The usual intake plus the supplement amount for variables with dietary supplements.
								 - prb_ variables: The long-term probability of consumption for episodic variables.
								 - amt_ variables: The long-term amount consumed given consumption for episodic variables.
								 - _seed: The random number generator seed used to generate the results, see the distrib_seed parameter for details.
	
Details:

	Usual intakes are simulated for each subject in the
	distrib_population dataset for different values of the random effects
	(u). The num_simulated_u parameter determines how many values of u
	are simulated per subject. The simulated u values are drawn from a
	multivariate normal distribution with a mean of zero and
	variance/covariance matrix equal to the posterior mean of Sigma-u in
	the MCMC model.

	Optionally, saved values of u drawn after the MCMC chain conditional on
	the posterior means of the parameters can be used instead of simulating new
	values of u. This must be done when using regression calibration for
	measurement error correction (see Regression Calibration section).

	Simulated usual intakes are output for each variable in the MCMC model. The
	usual intake for daily variables is simply the long-term average amount
	consumed. Usual intakes for episodic variables are the product of the
	long-term probability of consumption and the long-term amount consumed
	given consumption. For episodic variables, the usual intake and the
	separate probability and amount components are included in the output.
	
Regression Calibration:

	The simulated usual intakes from
	nci_multivar_distrib can be used in regression calibration for
	measurement error correction. To do this, saved random effect (u)
	matrices conditional on the posterior means of the MCMC parameters must be
	used in place of simulated values of u. The values of id must match
	with the _subjects dataset in multivar_mcmc_model to apply the saved post-MCMC
	u values. New values of u are simulated for unmatched observations.
	
Nuisance Covariates:

	Nuisance covariates are not of interest in the
	final model, but they must be accounted for when simulating long-term
	average intakes. For example, a dietary study with two recalls may want to
	account for whether a recall is the second one for a subjectand whether the
	recall was on a weekday or weekend. Nuisance covariates can be factored out
	by creating a row in distrib_population for each level of the covariate
	for every subject and creating a weighting variable with the weight for
	each level. An example of this procedure can be found in the daily nutrient
	analysis vignette.
	
Dietary Supplements:

	Dietary supplements can be added to the usual
	intakes for one or more variables. This function uses the shrink-then-add
	method which means that the usual intake is calculated first and then the
	supplement is added. If a variable has a dietary supplement associated
	with it, a base usual intake and a supplemented intake are included in the
	output.
*/
%macro nci_multivar_distrib(multivar_mcmc_lib=WORK,
														multivar_mcmc_model=,
														distrib_population=,
														id=,
														weight=,
														nuisance_weight=,
														use_mcmc_u_matrices=N,
														distrib_seed=,
														num_simulated_u=500,
														dietary_supplements=,
														additional_output=,
														outlib=WORK,
														outname=);
														
	proc sort data=&distrib_population. out=_distrib_pop; by &id.; run;
														
	proc iml;
	
		reset storage=_distiml.distrib_modules;
		load module=_all_;
		
		**1. Load MCMC parameters into IML;
		mcmc_parameters = load_parameters("&multivar_mcmc_lib..&multivar_mcmc_model.");
		
		**2. Initialize parameters and data matrices for distrib main loop;
		distrib_parameters = initialize_distrib("_distrib_pop",
																						"&id.",
																						"&nuisance_weight.",
																						mcmc_parameters$"covariates",
																						mcmc_parameters$"intercepts",
																						mcmc_parameters$"beta",
																						mcmc_parameters$"sigma_u",
																						mcmc_parameters$"sigma_e",
																						"&use_mcmc_u_matrices.",
																						"&dietary_supplements.",
																						&num_simulated_u.,
																						mcmc_parameters$"num_episodic",
																						mcmc_parameters$"num_daily",
																						mcmc_parameters$"num_mcmc_iterations",
																						mcmc_parameters$"num_burn",
																						mcmc_parameters$"num_thin",
																						mcmc_parameters$"num_post",
																						&distrib_seed.,
																						mcmc_parameters$"has_never_consumers",
																						mcmc_parameters$"never_consumer_covariates",
																						mcmc_parameters$"never_consumer_intercept",
																						mcmc_parameters$"alpha1");
		
		**3. Distrib main loop to create Monte Carlo dataset;
		call distrib_main_loop(distrib_parameters$"xbeta",
													 distrib_parameters$"u_standard_deviation",
													 distrib_parameters$"sigma_e_mean",
													 mcmc_parameters$"u_matrices_post",
													 "&use_mcmc_u_matrices.",
													 "&multivar_mcmc_lib..&multivar_mcmc_model._backtran",
													 distrib_parameters$"records",
													 distrib_parameters$"subjects",
													 mcmc_parameters$"mcmc_subjects",
													 mcmc_parameters$"episodic_variables",
													 mcmc_parameters$"daily_variables",
													 distrib_parameters$"num_records",
													 distrib_parameters$"num_subjects",
													 mcmc_parameters$"num_episodic",
													 mcmc_parameters$"num_daily",
													 distrib_parameters$"num_replicates",
													 distrib_parameters$"nuisance_weighting",
													 distrib_parameters$"variables_to_supplement",
													 distrib_parameters$"dietary_supplement_data",
													 "_distrib_pop",
													 "&id.",
													 "&weight.",
													 "&additional_output.",
													 mcmc_parameters$"has_never_consumers",
													 distrib_parameters$"consumer_probabilities",
													 "&outlib..&outname.");
													 
		call symputx("distrib_seed", distrib_parameters$"distrib_seed");
	quit;
	
	**Output seed;
	data &outlib..&outname.;
		set &outlib..&outname.;
	
		_seed = &distrib_seed.;
	run;
%mend nci_multivar_distrib;