% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nci_multivar_distrib.R
\name{nci_multivar_distrib}
\alias{nci_multivar_distrib}
\title{Simulate Usual Intakes}
\usage{
nci_multivar_distrib(
  multivar.mcmc.model,
  distrib.population,
  id,
  weight = NULL,
  nuisance.weight = NULL,
  use.mcmc.u.matrices = FALSE,
  distrib.seed = NULL,
  num.simulated.u = 500,
  dietary.supplements = NULL,
  additional.output = NULL
)
}
\arguments{
\item{multivar.mcmc.model}{An \code{nci.multivar.mcmc} object.}

\item{distrib.population}{A data frame. Must contain all \code{covariates} and
\code{never.consumer.covariates} in \code{multivar.mcmc.model}.}

\item{id}{Variable that identifies each subject in \code{distrib.population}.}

\item{weight}{Variable with weighting for each subject in
\code{distrib.population}.}

\item{nuisance.weight}{Variable with the weighting for each nuisance variable
level in \code{distrib.population}.}

\item{use.mcmc.u.matrices}{Flag specifying whether previously saved post-MCMC
random effects ('u') matrix draws should be used instead of simulating new
ones. Used in regression calibration procedures, see "Regression
Calibration". (default = \code{FALSE})}

\item{distrib.seed}{Integer starting seed for the random number generator. If
\code{NULL}, uses a randomly generated integer from -10^7 to 10^7, exclusive.
(default = \code{NULL})}

\item{num.simulated.u}{Integer specifying the number of simulated random
effect ('u') random draws should be made for each subject. Has no effect if
\code{use.mcmc.u.matrices} is \code{TRUE}. (default = \code{500})}

\item{dietary.supplements}{Named list of dietary supplement variable names.
The names of the list are the variables which the supplements should be
added to.}

\item{additional.output}{Vector of additional variables in
\code{distrib.population} to be included in the output dataset.}
}
\value{
An object of class \code{c("nci.multivar.distrib", "data.frame")} where
each row represents a simulated subject. Contains the following columns:
\itemize{
\item replicate: The sequence number for the simulated 'u' value used to simulate the intakes.
\item \code{id}: Identifier for each subject.
\item \code{weight}: Weighting for each subject.
\item Variables specified in \code{additional.output}.
\item \code{usual.intake} variables: The usual intake for each variable.
\item \code{supplemented.intake} variables: The usual intake plus the supplement amount for variables with dietary supplements.
\item \code{prob} variables: The long-term probability of consumption for episodic variables.
\item \code{amount} variables: The long-term amount consumed given consumption for episodic variables.

The object also contains the following attribute:
\item distrib.seed: The random number generator seed used to generate the results, see the \code{distrib.seed} parameter for details.
}
}
\description{
Simulates usual intakes for subjects that represent the
distribution of true usual intakes.
}
\details{
Usual intakes are simulated for each subject in the
\code{distrib.population} dataset for different values of the random effects
('u'). The \code{num.simulated.u} parameter determines how many values of 'u'
are simulated per subject. The simulated 'u' values are drawn from a
multivariate normal distribution with a mean of zero and
variance/covariance matrix equal to the posterior mean of \code{sigma.u} in
\code{multivar.mcmc.model}.

Optionally, saved values of 'u' drawn after the MCMC chain conditional on
the posterior means of the parameters can be used instead of simulating new
values of 'u'. This must be done when using regression calibration for
measurement error correction (see "Regression Calibration").

Simulated usual intakes are output for each variable in the MCMC model. The
usual intake for daily variables is simply the long-term average amount
consumed. Usual intakes for episodic variables are the product of the
long-term probability of consumption and the long-term amount consumed
given consumption. For episodic variables, the usual intake and the
separate probability and amount components are included in the output.
}
\section{Regression Calibration}{
 The simulated usual intakes from
\code{nci_multivar_distrib()} can be used in regression calibration for
measurement error correction. To do this, saved random effect ('u')
matrices conditional on the posterior means of the MCMC parameters must be
used in place of simulated values of 'u'. The values of \code{id} must match
with \code{mcmc.subjects} in \code{multivar.mcmc.model} to apply the saved post-MCMC
'u' values. New values of 'u' are simulated for unmatched observations. Use
\code{vignette("regression_calibration", package="ncimultivar")} for a full
regression calibration workflow.
}

\section{Nuisance Covariates}{
 Nuisance covariates are not of interest in the
final model, but they must be accounted for when simulating long-term
average intakes. For example, a dietary study with two recalls may want to
account for whether a recall is a subject's second one and whether the
recall was on a weekday or weekend. Nuisance covariates can be factored out
by creating a row in \code{distrib.population} for each level of the covariate
for every subject and creating a weighting variable with the weight for
each level. An example of this procedure can be found in the daily nutrient
analysis vignette.
}

\section{Dietary Supplements}{
 Dietary supplements can be added to the usual
intakes for one or more variables. This function uses the shrink-then-add
method which means that the usual intake is calculated first and then the
supplement is added. If a variable has a dietary supplement associated with
it, a base usual intake and a supplemented intake are included in the
output.
}

\examples{
#subset NHANES data
nhanes.subset <- nhcvd[nhcvd$SDMVSTRA \%in\% c(48, 60, 72),]

boxcox.sodium <- boxcox_survey(input.data=nhanes.subset,
                               row.subset=(nhanes.subset$DAY == 1),
                               variable="TSODI",
                               id="SEQN",
                               repeat.obs="DAY",
                               weight="WTDRD1",
                               covariates="RIDAGEYR")

boxcox.g.whole <- boxcox_survey(input.data=nhanes.subset,
                                row.subset=(nhanes.subset$DAY == 1),
                                variable="G_WHOLE",
                                is.episodic=TRUE,
                                id="SEQN",
                                repeat.obs="DAY",
                                weight="WTDRD1",
                                covariates="RIDAGEYR")

boxcox.lambda.data <- rbind(boxcox.sodium, boxcox.g.whole)

minimum.amount.data <- calculate_minimum_amount(input.data=nhanes.subset,
                                                row.subset=(nhanes.subset$DAY == 1),
                                                episodic.variables="G_WHOLE",
                                                daily.variables="TSODI")

pre.mcmc.data <- nci_multivar_preprocessor(input.data=nhanes.subset,
                                           episodic.variables="G_WHOLE",
                                           daily.variables="TSODI",
                                           continuous.covariates="RIDAGEYR",
                                           boxcox.lambda.data=boxcox.lambda.data,
                                           minimum.amount.data=minimum.amount.data)

mcmc.output <- nci_multivar_mcmc(pre.mcmc.data=pre.mcmc.data,
                                 id="SEQN",
                                 weight="WTDRD1",
                                 repeat.obs="DAY",
                                 episodic.variables="G_WHOLE",
                                 daily.variables="TSODI",
                                 default.covariates="std.RIDAGEYR",
                                 num.mcmc.iterations=1000,
                                 num.burn=500,
                                 num.thin=1)

#use first instance of each subject as population base
mcmc.input.data <- pre.mcmc.data$mcmc.input
population.base <- mcmc.input.data[!duplicated(mcmc.input.data$SEQN),]

distrib.output <- nci_multivar_distrib(multivar.mcmc.model=mcmc.output,
                                       distrib.population=population.base,
                                       id="SEQN",
                                       weight="WTDRD1",
                                       num.simulated.u=100,
                                       additional.output="RIDAGEYR")

head(distrib.output)
}
