% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nci_multivar_preprocessor.R
\name{nci_multivar_preprocessor}
\alias{nci_multivar_preprocessor}
\title{Clean and prepare data for the MCMC model}
\usage{
nci_multivar_preprocessor(
  input.data,
  episodic.variables = NULL,
  episodic.biomarkers = NULL,
  daily.variables = NULL,
  daily.biomarkers = NULL,
  continuous.covariates = NULL,
  boxcox.lambda.data,
  minimum.amount.data
)
}
\arguments{
\item{input.data}{A data frame.}

\item{episodic.variables}{Vector of episodic variables.}

\item{episodic.biomarkers}{Vector episodic biomarkers.}

\item{daily.variables}{Vector of daily variables.}

\item{daily.biomarkers}{Vector daily biomarkers.}

\item{continuous.covariates}{Vector of continuous covariates.}

\item{boxcox.lambda.data}{A data frame with Box-Cox lambda parameters for
each variable. Must contain only the following columns:
\itemize{
\item variable: Name of the variable.
\item tran_lambda: Box-Cox transformation parameter to use for the variable.
}}

\item{minimum.amount.data}{A data frame with minimum consumption amounts for
each variable. Must contain only the following columns:
\itemize{
\item variable: Name of the variable.
\item minamount: Minimum amount consumed for the variable.
}}
}
\value{
An \code{nci.multivar.preprocessor} object containing the following
elements:
\itemize{
\item mcmc.input: A data frame with all of the columns of \code{input.data} plus any created indicator variables and standardized variables and covariates.
\item backtransformation: A data frame with the following columns:
\itemize{
\item variable: The name of the variable.
\item tran_lambda: The Box-Cox lambda used to transform the variable.
\item minamount: The minimum allowed usual intake, defined as half of the smallest non-zero intake in the observed data.
\item tran_center: The mean of the Box-Cox transformed variable before standardization.
\item tran_scale: The standard deviation of the Box-Cox transformed variable before standardization divided by \code{sqrt(2)}.
\item biomarker: Logical flag of whether the variable is a biomarker assumed to be unbiased on the transformed scale.
If \code{FALSE}, a bias correction factor will be added and a 9-point approximation will be used for backtransformation.
If \code{TRUE}, an exact backtransformation will be used with no correction.
}
}
}
\description{
Removes missing and negative values from variables, makes
indicators for episodic variables, and standardizes variables and
covariates.
}
\details{
For episodic variables, the MCMC requires a separate indicator and
amount to model the probability of consumption and the amount consumed. The
variable values for both indicators and amounts are Box-Cox transformed,
then standardized to a mean of 0 and variance of 2. Continuous covariates
are standardized to a mean of 0 and a variance of 1. This standardization
is done to meet the assumptions of the MCMC laid out in Zhang, et al.
(2011). Indicator variables have the prefix \code{ind.}, standardized amounts
have the prefix \code{amt.}, and standardized covariates have the prefix \code{std.}.
}
\examples{
#subset NHANES data
nhanes.subset <- nhcvd[nhcvd$SDMVSTRA \%in\% c(48, 60, 72),]

boxcox.sodium <- boxcox_survey(input.data=nhanes.subset,
                               row.subset=(nhanes.subset$DAY == 1),
                               variable="TSODI",
                               id="SEQN",
                               repeat.obs="DAY",
                               weight="WTDRD1",
                               covariates="RIDAGEYR")

boxcox.g.whole <- boxcox_survey(input.data=nhanes.subset,
                                row.subset=(nhanes.subset$DAY == 1),
                                variable="G_WHOLE",
                                is.episodic=TRUE,
                                id="SEQN",
                                repeat.obs="DAY",
                                weight="WTDRD1",
                                covariates="RIDAGEYR")

boxcox.lambda.data <- rbind(boxcox.sodium, boxcox.g.whole)

minimum.amount.data <- calculate_minimum_amount(input.data=nhanes.subset,
                                                row.subset=(nhanes.subset$DAY == 1),
                                                episodic.variables="G_WHOLE",
                                                daily.variables="TSODI")

pre.mcmc.data <- nci_multivar_preprocessor(input.data=nhanes.subset,
                                           episodic.variables="G_WHOLE",
                                           daily.variables="TSODI",
                                           continuous.covariates="RIDAGEYR",
                                           boxcox.lambda.data=boxcox.lambda.data,
                                           minimum.amount.data=minimum.amount.data)

#indicator and amount names
pre.mcmc.data$amount.indicator.names

#indicator for whole grains
head(pre.mcmc.data$mcmc.input$ind.G_WHOLE)

#amounts are standardized to mean 0 and variance 2
mean(pre.mcmc.data$mcmc.input$amt.TSODI)
var(pre.mcmc.data$mcmc.input$amt.TSODI)

mean(pre.mcmc.data$mcmc.input$amt.G_WHOLE, na.rm=TRUE)
var(pre.mcmc.data$mcmc.input$amt.G_WHOLE, na.rm=TRUE)

#backtransformation data
pre.mcmc.data$backtransformation
}
