options mergenoby=nowarn;

/*****************************************************************************/
/*****************************************************************************/
/* The following files are required to run this example:                     */
/*   nlmixed_univariate.macro.v1.2.sas                                       */
/*   simulated_example_data.sas7bdat                                         */
/*****************************************************************************/
/*                                                                           */
/*****************************************************************************/
/* This example analysis uses regression calibration and fits a logistic     */
/* regression model to assess the relationships between two dietary          */
/* components and a health outcome.  For this analysis, the example data     */
/* were simulated to include data from 100,000 participants, and the 24-hour */
/* dietary recall is the main dietary instrument.  The dietary components    */
/* are red meat and energy, and the health outcome is a binary event         */
/* variable.  The simulated data include repeated intake measurements from   */
/* 24-hour dietary recalls.                                                  */
/*                                                                           */
/* This program fits univariate measurement error models for red meat and    */
/* energy to obtain starting values needed to fit a bivariate measurement    */
/* error model in a subsequent analysis step.                                */
/*                                                                           */
/* This analysis uses bootstrap variance estimation, so the bivariate        */
/* measurement error model and logistic model are fit using the original     */
/* data set and 200 replicate data sets.  The 200 replicate data sets are    */
/* generated using the SAS Surveyselect procedure, and these 200 replicate   */
/* data sets are combined with the original data set and are saved for later */
/* use.                                                                      */
/*****************************************************************************/



title1 "Fit Univariate and Bivariate Measurement Error Models Using MLE with 24-Hour Recall as Main Instrument";
title2 "Predict Intake and Perform Regression Calibration";
title3 "Assess Relationships between Two Dietary Components and a Health Outcome";



***********************************************************************;
*** The path must be modified in the following lines to specify the ***;
*** location of the SAS macros, the SAS data files, and the SAS     ***;
*** library that will be used for output                            ***;
***********************************************************************;

%let home = /prj/dcp/statprog/meas.err/develop.public.resources.stat.meth;

*** Include the required macros ***;
%include "&home/include.files.macros/nlmixed_univariate_macro_v1.2.sas";

*** Input data library ***;
libname inlib "&home/simdata";

*** Output data library ***;
libname outlib "&home/bivar_epidemiology_example2_mle_main24hr/outlib";



*********************************************************************************;
*** Import the simulated data file for this example and set it as replicate 0 ***;
*********************************************************************************;

data simdata;
  set inlib.simulated_example_data;
  replicate = 0;
run;



**************************************************************************************************;
*** Use proc surveyselect to generate 200 replicate bootstrap data sets for the simulated data ***;
**************************************************************************************************;

proc surveyselect data=simdata(drop = replicate) out=bootdata seed=1563783 method=urs sampsize=100000 outhits reps=200;
run;



**********************************************************;
*** Combine the simulated data with the bootstrap data ***;
**********************************************************;

data simdata_and_bootdata;
  set simdata bootdata;
run;



************************************************************************;
*** Create the unique record ID variable for each replicate data set ***;
************************************************************************;

proc sort data=simdata_and_bootdata;
  by replicate;
run;

data simdata_and_bootdata;
  set simdata_and_bootdata;
  by replicate;

  retain replicaterowid;

  if first.replicate then replicaterowid = 1;
  else replicaterowid = replicaterowid + 1;
run;



**************************************************;
*** Save the simulated data and bootstrap data ***;
**************************************************;

data outlib.simdata_and_bootdata;
  set simdata_and_bootdata;
run;



***********************************************************;
*** Create a data set for the NLMIXED_UNIVARIATE macro. ***;
*** The input data has one record per person.           ***;
*** The NLMIXED_UNIVARIATE macro uses a data set that   ***;
*** includes one or more observations for each person.  ***;
***********************************************************;

data datamrec(keep = simrowid r_redmeat_g r_energy_kcal repeat ybin_event std_entry_age
                     std_log_bmi std_boxcox_ffq_redmeat_g std_boxcox_ffq_energy_kcal);

  set simdata;

  ********************************************;
  *** Output record for 1st 24-hour recall ***;
  ********************************************;

  if n(of r1_redmeat_g r1_energy_kcal) > 0 then do;

    repeat = 1;

    r_redmeat_g   = r1_redmeat_g;
    r_energy_kcal = r1_energy_kcal;

    output;

  end;

  ********************************************;
  *** Output record for 2nd 24-hour recall ***;
  ********************************************;

  if n(of r2_redmeat_g r2_energy_kcal) > 0 then do;

    repeat = 2;

    r_redmeat_g   = r2_redmeat_g;
    r_energy_kcal = r2_energy_kcal;

    output;

  end;

run;



/****************************************************************************
*                                                                           *
* SAS macro NLMixed_Univariate fits a univariate model for a food/nutrient. *
* The food/nutrient can be episodically consumed or consumed every day.     *
*                                                                           *
* Model for episodically consumed foods/nutrients (two-part model):         *
* For episodically consumed foods/nutrients, the macro fits a two-part      *
* nonlinear mixed model, where the first part is the probability to         *
* consume and the second part is the amount consumed on a consumption day.  *
* The model allows for covariates in each part, includes a random effect    *
* for each part, and allows the random effects to be correlated.            *
*                                                                           *
* Model for foods/nutrients consumed every day (one-part model):            *
* For foods/nutrients consumed every day, the macro fits a one-part         *
* nonlinear mixed model of the amount consumed (the probability to consume  *
* is assumed to be 1). The model allows for covariates and includes a       *
* random effect.                                                            *
*                                                                           *
* For a food/nutrient that is consumed nearly every day by nearly everyone, *
* so that the number of zero values is small, it may be preferable to use   *
* the one-part (consumed every day) model, since the two-part model may     *
* have trouble modeling the probability to consume in such a situation.     *
*                                                                           *
* Note, however, that the one-part model requires all responses to be       *
* greater than zero (zero values are treated as missing values).            *
* Before fitting the one-part model to a food/nutrient that has some zero   *
* values, replace the zero values with a small positive value, such as      *
* half the smallest observed nonzero value.                                 *
*                                                                           *
* The macro calls the NLMixed procedure to fit the model.                   *
*                                                                           *
*****************************************************************************
*                                                                           *
* Macro Parameters:                                                         *
*                                                                           *
*    Required Parameters:                                                   *
*       data          = name of SAS data set containing the data to be      *
*                       analyzed. The data set has multiple observations    *
*                       for each subject, one for each reptition of the     *
*                       24-hour recall (or other dietary instrument).       *
*       subject       = name of the variable that uniquely identifies each  *
*                       subject (i.e., ID variable).                        *
*       repeat        = name of the variable that indexes repeated          *
*                       observations for each subject.                      *
*       response      = name of the food/nutrient variable to be modeled    *
*                       (24-hour recall variable for the food/nutrient).    *
*       modeltype     = model for food/nutrient:                            *
*                       to fit the two-part (epsisodic) model, specify      *
*                          modeltype = TWOPART                              *
*                       to fit the one-part (every day) model, specify      *
*                          modeltype = ONEPART                              *
*                                                                           *
*    Optional Parameters:                                                   *
*       covars_prob   = list of variables that are covariates in the        *
*                       probability part of the two-part model.             *
*                       if modeltype=ONEPART, then covars_prob is ignored.  *
*       covars_amt    = list of variables that are covariates in the        *
*                       one-part model or the amount part of the            *
*                       two-part model.                                     *
*       link          = link function for the probability part of the two-  *
*                       part model. to fit a logistic model, specify        *
*                          link = logit                                     *
*                       to fit a probit model, specify                      *
*                          link = probit                                    *
*                       by default, link = probit.                          *
*                       if modeltype = ONEPART, then link is ignored.       *
*       lambda        = Box-Cox transformation parameter for the amount     *
*                       part of the model. If lambda is not specified,      *
*                       then it is estimated as part of the model.          *
*       var_u1        = variance of the random effect in the probability    *
*                       part of the two-part model.                         *
*                       If var_u1 is not specified, then it is estimated    *
*                       as part of the model.                               *
*                       if modeltype = ONEPART, then var_u1 is ignored.     *
*       var_u2        = variance of the random effect in the one-part model *
*                       or the amount part of the two-part model.           *
*                       If var_u2 is not specified, then it is estimated    *
*                       as part of the model.                               *
*       indep_u       = Y if random effects u1 and u2 are independent.      *
*                     = N if random effects u1 and u2 are dependent.        *
*                       by default, indep_u = N.                            *
*                       if modeltype = ONEPART, then indep_u is ignored.    *
*       replicate_var = name of the sampling weight variable if the data    *
*                       is from a complex survey with weights.              *
*                       by default, the macro performs an unweighted        *
*                       analysis (assumes a simple random sample).          *
*       nloptions     = options for the NLMixed procedure that are          *
*                       appended to the PROC NLMIXED statement, e.g.,       *
*                          nloptions = technique=newrap maxiter=200,        *
*       init_parms    = name of SAS data set that contains initial          *
*                       parameter estimates. See the description of output  *
*                       data set parms_u (below) for further information.   *
*                       if init_parms is not specified, then the macro      *
*                       calculates initial parameter estimates.             *
*       print         = Y to print the output from the model.               *
*                     = N to supress printing the output from the model.    *
*                     = V (verbose) to print extra output.                  *
*                       by default, print = Y.                              *
*       ntitle        = number of titles defined by the user.               *
*                       by default, ntitle = 2.                             *
*                                                                           *
*****************************************************************************
*                                                                           *
* Output Data Sets:                                                         *
*                                                                           *
*   parms_u = data set containing parameter estimates for the model.        *
*             parms_u contains the following variables:                     *
*                                                                           *
*                 A_Intercept = intercept in the amount part of the model.  *
*                 A_varname   = regression slope for covariate "varname"    *
*                               in the amount part of the model.            *
*                 A_LogSDe    = Log(Sqrt(Var_e))                            *
*                 LogSDu2     = Log(Sqrt(Var_u2))                           *
*                 Var_e       = variance of the within-person error in the  *
*                               amount part of the model.                   *
*                 Var_u2      = variance of the random effect in the        *
*                               amount part of the model.                   *
*                                                                           *
*             if fitting the two-part model, then parms_u also contains     *
*             the following variables:                                      *
*                                                                           *
*                 P_Intercept = intercept in the prob. part of the model.   *
*                 P_varname   = regression slope for covariate "varname"    *
*                               in the prob. part of the model.             *
*                 LogSDu1     = Log(Sqrt(Var_u2))                           *
*                 z_u1u2      = Fisher transformation of Corr_u1u2:         *
*                                  z = ln[(1+corr)/(1-corr)] / 2            *
*                 Var_u1      = variance of the random effect in the        *
*                               prob. part of the model.                    *
*                 Cov_u1u2    = covariance of random effects u1 and u2.     *
*                 Corr_u1u2   = correlation of random effects u1 and u2.    *
*                                                                           *
*             note: if specifying initial parameter estimates using the     *
*                   init_parms option, the init_parms data set should have  *
*                   the same variables as parms_u, except it should not     *
*                   include var_e, var_u2, var_u1, cov_u1u2 or corr_u1u2    *
*                   (these are derived parameters, i.e., functions of the   *
*                    other parameters).                                     *
*                                                                           *
*   pred_x_u = data set containing predicted values for the model.          *
*              pred_x_u contains all the variables in the input data set,   *
*              plus the following variable:                                 *
*                                                                           *
*                 pred_x_a = predicted mean amount on consumption day.      *
*                                                                           *
*              if fitting the two-part model, then pred_x_u also contains   *
*              the following variable:                                      *
*                                                                           *
*                  pred_x_p = predicted probability of consumption.         *
*                                                                           *
****************************************************************************/

**************************************************************************************;
*** Call NLMIXED_UNIVARIATE to fit a univariate nonlinear mixed model for red meat ***;
**************************************************************************************;

%nlmixed_univariate(data          = datamrec,
                    subject       = simrowid,
                    repeat        = repeat,
                    response      = r_redmeat_g,
                    modeltype     = TWOPART,
                    covars_prob   = std_boxcox_ffq_redmeat_g std_boxcox_ffq_energy_kcal std_entry_age std_log_bmi,
                    covars_amt    = std_boxcox_ffq_redmeat_g std_boxcox_ffq_energy_kcal std_entry_age std_log_bmi,
                    link          = PROBIT,
                    lambda        = ,
                    var_u1        = ,
                    var_u2        = ,
                    indep_u       = N,
                    replicate_var = ,
                    nloptions     = ,
                    init_parms    = ,
                    print         = Y,
                    ntitle        = 3
                    );



**********************************************;
*** Save and print the parameter estimates ***;
**********************************************;

data outlib._init_param_redmeat;
  set parms_u;
run;

proc print data=parms_u;
  title4 "Parameter Estimates for Red Meat";
run;



************************************************************************************;
*** Call NLMIXED_UNIVARIATE to fit a univariate nonlinear mixed model for energy ***;
************************************************************************************;


%nlmixed_univariate(data          = datamrec,
                    subject       = simrowid,
                    repeat        = repeat,
                    response      = r_energy_kcal,
                    modeltype     = ONEPART,
                    covars_prob   = ,
                    covars_amt    = std_boxcox_ffq_redmeat_g std_boxcox_ffq_energy_kcal std_entry_age std_log_bmi,
                    link          = ,
                    lambda        = ,
                    var_u1        = ,
                    var_u2        = ,
                    indep_u       = N,
                    replicate_var = ,
                    nloptions     = ,
                    init_parms    = ,
                    print         = Y,
                    ntitle        = 3
                    );



**********************************************;
*** Save and print the parameter estimates ***;
**********************************************;

data outlib._init_param_energy;
  set parms_u;
run;

proc print data=parms_u;
  title4 "Parameter Estimates for Energy";
run;
