/*
Program Purpose:
IML modules for functions called during distrib main loop;
*/

libname _modules ".";

proc iml;

	**matches indices of column vector B that correspond to values in column vector A;
	start match(a, b);
	
		**convert vectors A and B to characters to avoid type mismatches;
		if type(a) = "N" then do;
		
			a_char = char(a);
		end;
		else do;
		
			a_char = a;
		end;
		
		if type(b) = "N" then do;
		
			b_char = char(b);
		end;
		else do;
		
			b_char = b;
		end;
		
		**equalize the lengths of A and B for proper merging;
		length_a = nleng(a_char);
		length_b = nleng(b_char);
		
		max_length = length_a <> length_b;
		fmt = cat("$", strip(char(max_length)), ".");
		
		a_char = putc(a_char, fmt);
		b_char = putc(b_char, fmt);
		
		**place vectors A and B into datasets;
		create _temp_a from a_char[colname={"value"}];
			append from a_char;
		close _temp_a;
		
		create _temp_b from b_char[colname={"value"}];
			append from b_char;
		close _temp_b;
		
		**submit statement to access merge in base SAS;
		submit;
		
			**ordering variables for A and B;
			data _temp_a;
				set _temp_a;
				
				order_a = _N_;
			run;
			
			data _temp_b;
				set _temp_b;
				
				order_b = _N_;
			run;
			
			**match values using merge;
			proc sort data=_temp_a; by value; run;
			proc sort data=_temp_b; by value; run;
			data _temp_a;
				merge _temp_a (in=in_a)
							_temp_b;
				by value;
				if in_a = 1;
				
				match_index = order_b;
			run;
			
			**sort back to original order;
			proc sort data=_temp_a; by order_a; run;
		endsubmit;
		
		**extract match index vector;
		use _temp_a;
			read all var {match_index} into match_index;
		close _temp_a;
		
		**clean up temporary datasets;
		call delete(_temp_a);
		call delete(_temp_b);
		
		return match_index;
	finish;
	
	**Simulates random intercepts ('u' matrix) for each subject;
	start generate_u_matrix(use_mcmc_u_matrices,
													mcmc_u_matrix,
													u_standard_deviation,
													mcmc_u_index,
													mcmc_distrib_match,
													sim_u_index,
													num_subjects,
													num_episodic,
													num_daily);
													
		**total number of episodic and daily variables;
		num_variables = 2*num_episodic + num_daily;
		
		if upcase(use_mcmc_u_matrices) = "Y" then do;
		
			u_matrix = j(num_subjects, num_variables, .);
		
			**if using MCMC U matrices, use the U matrix for the current replicate as a base;
			if ^IsEmpty(mcmc_u_index) then do;
			
				u_matrix[mcmc_u_index,] = mcmc_u_matrix[mcmc_distrib_match,];
			end;
			
			**simulate remaining observations if needed;
			if ^IsEmpty(sim_u_index) then do;
			
				num_sim = ncol(sim_u_index);
				normals = j(num_sim, num_variables, .);
				call randgen(normals, "Normal");
				u_matrix[sim_u_index,] = normals * u_standard_deviation;
			end;
		end;
		else do;
		
			normals = j(num_subjects, num_variables, .);
			call randgen(normals, "Normal");
			u_matrix = normals * u_standard_deviation;
		end;
		
		return u_matrix;
	finish;
	
	**For models with never-consumers allowed for the first episodic variable, identifies which subjects have any consumption;
	start find_never_consumers(has_never_consumers,
													 	 consumer_probabilities,
													 	 subject_record_match,
													 	 num_subjects);
		
		if has_never_consumers = 1 then do;
		
			never_consumer_selection = j(num_subjects, 1, .);
			call randgen(never_consumer_selection, "Uniform");
			never_consumers = (never_consumer_selection[subject_record_match] > consumer_probabilities);
		end;
		else do;
		
			never_consumers = {};
		end;
		
		return never_consumers;
	finish;
	
	**Calculates the probability of consumption for episodic variables;
	start calculate_probability(consumption_probability,
															probability_names,
															xbeta_u,
															episodic_variables,
															num_records,
															num_episodic,
															has_never_consumers,
															never_consumers);
		
		if num_episodic > 0 then do;
		
			consumption_probability = j(num_records, num_episodic, .);
		
			**Calculate consumption probability of episodic variables;
			do i = 1 to num_episodic;
		
				consumption_probability[,i] = cdf("Normal", xbeta_u[,2*i - 1]);
			end;
		
			**if never-consumers are allowed, set consumption probabilities for never-consumers to zero for the first episodic food;
			if has_never_consumers = 1 then do;
		
				if any(never_consumers) = 1 then do;
			
					consumption_probability[loc(never_consumers),1] = 0;
				end;
			end;
		
			probability_names = cat("prb_", episodic_variables);
		end;
		else do;
		
			consumption_probability = {};
			probability_names = {};
		end;
	finish;
	
	**Calculates the consumption amount on the original scale for episodic and daily variables;
	start calculate_amount(backtransformed_amount,
												 amount_names,
												 xbeta_u,
												 sigma_e_mean,
												 backtransformation_data,
												 episodic_variables,
												 daily_variables,
												 num_records,
												 num_episodic,
												 num_daily,
												 has_never_consumers,
												 never_consumers);
												 
		variables = episodic_variables || daily_variables;
		
		**backtransformation parameters;
		use (backtransformation_data);
			read all var {variable} into tran_variable;
			read all var {tran_lambda} into tran_lambda;
			read all var {minamount} into minamount;
			read all var {tran_center} into tran_center;
			read all var {tran_scale} into tran_scale;
			read all var {biomarker} into biomarker;
		close (backtransformation_data);
		
		**calculate consumption amount on the original scale for all variables;
		backtransformed_amount = j(num_records, num_episodic + num_daily, .);
		do i = 1 to num_episodic + num_daily;
		
			backtran_index = loc(tran_variable = variables[i]);
			
			if i <= num_episodic then do;
				amount_index = 2*i;
			end;
			else do;
				amount_index = num_episodic + i;
			end;
			
			backtransformed_amount[,i] = backtransform(xbeta_u[,amount_index],
																								 sigma_e_mean[amount_index, amount_index],
																								 tran_lambda[backtran_index],
																								 tran_center[backtran_index],
																								 tran_scale[backtran_index],
																								 minamount[backtran_index],
																								 biomarker[backtran_index]);
		end;
		
		**if never-consumers are allowed, set amounts for never-consumers to missing for the first episodic food;
		if has_never_consumers = 1 & num_episodic > 0 then do;
		
			if any(never_consumers) = 1 then do;
			
				backtransformed_amount[loc(never_consumers),1] = .;
			end;
		end;
		
		amount_names = cat("amt_", variables);
	finish;
	
	start calculate_usual_intake(usual_intake,
															 usual_intake_names,
															 consumption_probability,
															 backtransformed_amount,
															 episodic_variables,
															 daily_variables,
															 num_records,
															 num_episodic,
															 num_daily,
															 has_never_consumers,
															 never_consumers);
		
		**calculate usual intake for episodic and daily variables;
		usual_intake = j(num_records, num_episodic + num_daily, .);
		
		if num_episodic > 0 then do;
			
			usual_intake[,1:num_episodic] = consumption_probability # backtransformed_amount[,1:num_episodic];
		end;
		
		if num_daily > 0 then do;
			
			usual_intake[,(num_episodic+1):(num_episodic+num_daily)] = backtransformed_amount[,(num_episodic+1):(num_episodic+num_daily)];
		end;
		
		**if never-consumers are allowed, set usual intake for never-consumers to zero;
		if has_never_consumers = 1 & num_episodic > 0 then do;
		
			if any(never_consumers) = 1 then do;
			
				usual_intake[loc(never_consumers),1] = 0;
			end;
		end;
		
		variables = episodic_variables || daily_variables;
		usual_intake_names = cat("usl_", variables);
	finish;
	
	start calculate_supplemented_intake(supplemented_intake,
																			supplemented_names,
																			usual_intake,
																			dietary_supplement_data,
																			variables_to_supplement,
																			episodic_variables,
																			daily_variables,
																			num_records);
																			
		if ncol(dietary_supplement_data) > 0 then do;
		
			variables = episodic_variables || daily_variables;
			
			num_supplemented = ncol(dietary_supplement_data);
			
			supplemented_intake = j(num_records, num_supplemented, .);
			do i = 1 to num_supplemented;
			
				variable_index = loc(variables = variables_to_supplement[i]);
				supplemented_intake[,i] = usual_intake[,variable_index] + dietary_supplement_data[,i];
			end;
			
			supplemented_names = cat("sup_", variables_to_supplement);
		end;
		else do;
		
			supplemented_intake = {};
			supplemented_names = {};
		end;
	finish;
	
	**Calculates consumption amounts backtransformed to the original scale from a mixed model predictor of a Box-Cox transformed variable;
	start backtransform(xbeta_u_var,
											sigma_e_var,
											lambda,
											center,
											scale,
											minimum_amount,
											is_biomarker);
											
		if is_biomarker = 1 then do;
		
			boxcox_amounts = center + scale # xbeta_u_var;
			backtransformed_amounts = inverse_box_cox(boxcox_amounts, lambda, minimum_amount);
		end;
		else do;
		
			if lambda = 0 then do;
			
				boxcox_amounts = center + scale # xbeta_u_var + (scale**2)*sigma_e_var/2;
				backtransformed_amounts = inverse_box_cox(boxcox_amounts, 0, minimum_amount);
			end;
			else do;
			
				**perform 9-point approximation for non-zero lambda;
				x9pt = {-2.1, -1.3, -0.8, -0.5, 0, 0.5, 0.8, 1.3, 2.1};
				w9pt = {0.063345, 0.080255, 0.070458, 0.159698, 0.252489, 0.159698, 0.070458, 0.080255, 0.063345};
				w9pt = w9pt/sum(w9pt);
				
				backtransformed_amounts = j(nrow(xbeta_u_var), 1, 0);
				do i = 1 to 9;
				
					boxcox_amounts = center + scale # (xbeta_u_var + x9pt[i]*sqrt(sigma_e_var));
					backtransformed_amounts = backtransformed_amounts + w9pt[i] # inverse_box_cox(boxcox_amounts, lambda, minimum_amount);
				end;
			end;
		end;
		
		return backtransformed_amounts;
	finish;
	
	**Converts Box-Cox transformed values back to the original scale;
	start inverse_box_cox(boxcox_values,
												lambda,
												minimum_amount);
												
		if lambda = 0 then do;
		
			inverse_boxcox = exp(boxcox_values);
		end;
		else do;
		
			inverse_boxcox = ((1 + lambda # boxcox_values) <> 0) ## (1/lambda);
		end;
		
		inverse_boxcox = inverse_boxcox <> minimum_amount;
		
		return inverse_boxcox;
	finish;
	
	reset storage=_modules.distrib_modules;
	store module=(match
								generate_u_matrix
								find_never_consumers
								calculate_probability
								calculate_amount
								calculate_usual_intake
								calculate_supplemented_intake
								backtransform
								inverse_box_cox);
quit;