#Removes missing and negative data from input dataset and outputs it in a separate data frame.
remove_missing_and_negative <- function(input.data,
                                        episodic.variables,
                                        episodic.biomarkers,
                                        daily.variables,
                                        daily.biomarkers,
                                        continuous.covariates) {

  #convert input data to a data frame
  input.data <- as.data.frame(input.data)

  #Add biomarkers to variable lists
  episodic.variables <- c(episodic.variables, episodic.biomarkers[!(episodic.biomarkers %in% episodic.variables)])
  daily.variables <- c(daily.variables, daily.biomarkers[!(daily.biomarkers %in% daily.variables)])

  #Missing records for episodic variables, daily variables, and continuous covariates
  is.missing <- rep(FALSE, nrow(input.data))
  for(variable in c(episodic.variables, daily.variables, continuous.covariates)) {

    is.missing <- is.missing | is.na(input.data[,variable,drop=TRUE])
  }

  #Negative values for episodic variables and daily variables
  is.negative <- rep(FALSE, nrow(input.data))
  for(variable in c(episodic.variables, daily.variables)) {

    is.negative <- is.negative | (input.data[,variable,drop=TRUE] < 0)
  }

  #Output cleaned and excluded data
  cleaned.data <- input.data[!is.missing & !is.negative,]
  attr(cleaned.data, "excluded") <- input.data[is.missing | is.negative,]

  return(cleaned.data)
}

#Creates indicator variables, standardized amounts, and standardized covariates.
create_standardized_variables <- function(input.data,
                                          episodic.variables,
                                          episodic.biomarkers,
                                          daily.variables,
                                          daily.biomarkers,
                                          continuous.covariates,
                                          boxcox.lambda.data,
                                          minimum.amount.data) {

  #Add biomarkers to variable lists
  episodic.variables <- c(episodic.variables, episodic.biomarkers[!(episodic.biomarkers %in% episodic.variables)])
  daily.variables <- c(daily.variables, daily.biomarkers[!(daily.biomarkers %in% daily.variables)])

  num.episodic <- length(episodic.variables)
  num.daily <- length(daily.variables)

  #Initialize output data
  standardized.data <- input.data

  #create episodic indicators and Box-Cox transform and standardize episodic amounts to a mean of zero and a standard deviation of sqrt(2)
  center.episodic <- numeric(num.episodic)
  scale.episodic <- numeric(num.episodic)
  for(i in seq_len(num.episodic)) {

    variable.values <- input.data[,episodic.variables[i],drop=TRUE]
    lambda <- boxcox.lambda.data[boxcox.lambda.data$variable == episodic.variables[i], "tran_lambda"]

    indicator <- (variable.values > 0)
    boxcox.values <- ifelse(indicator, boxcox_transform(variable.values, lambda), NA)
    std.boxcox.values <- standardize(boxcox.values, mu=0, sigma=sqrt(2))

    standardized.data[,paste0("ind.", episodic.variables[i])] <- indicator
    standardized.data[,paste0("amt.", episodic.variables[i])] <- std.boxcox.values

    center.episodic[i] <- attr(std.boxcox.values, "center")
    scale.episodic[i] <- attr(std.boxcox.values, "scale")
  }

  #Box-Cox transform and standardize daily amounts to a mean of zero and a standard deviation of sqrt(2)
  center.daily <- numeric(num.daily)
  scale.daily <- numeric(num.daily)
  for(i in seq_len(num.daily)) {

    variable.values <- input.data[,daily.variables[i],drop=TRUE]
    lambda <- boxcox.lambda.data[boxcox.lambda.data$variable == daily.variables[i], "tran_lambda"]
    minamount <- minimum.amount.data[minimum.amount.data$variable == daily.variables[i], "minamount"]

    variable.values[variable.values < minamount] <- minamount
    boxcox.values <- boxcox_transform(variable.values, lambda)
    std.boxcox.values <- standardize(boxcox.values, mu=0, sigma=sqrt(2))

    standardized.data[,paste0("amt.", daily.variables[i])] <- std.boxcox.values

    center.daily[i] <- attr(std.boxcox.values, "center")
    scale.daily[i] <- attr(std.boxcox.values, "scale")
  }

  #standardize continuous covariates to mean of zero and a standard deviation of 1
  for(covariate in continuous.covariates) {

    covariate.values <- input.data[,covariate,drop=TRUE]
    std.covariate.values <- standardize(covariate.values, mu=0, sigma=1)

    standardized.data[,paste0("std.", covariate)] <- std.covariate.values
  }

  #output standardized data with standardization parameters
  center.scale.data <- data.frame(variable=c(episodic.variables, daily.variables),
                                  tran_center=c(center.episodic, center.daily),
                                  tran_scale=c(scale.episodic, scale.daily))
  attr(standardized.data, "center.scale") <- center.scale.data
  return(standardized.data)
}

#Box-Cox transformation function.
boxcox_transform <- function(variable, lambda) {

  if(lambda == 0) {

    box.cox.variable <- log(variable)
  } else {

    box.cox.variable <- (variable^lambda - 1)/lambda
  }

  return(box.cox.variable)
}

#Standardizes a variable to specific mean and standard deviation
standardize <- function(values, mu=0, sigma=1) {

  center <- mean(values, na.rm=TRUE)
  scale <- sd(values, na.rm=TRUE)/sigma

  std.values <- (values - center)/scale
  attr(std.values, "center") <- center
  attr(std.values, "scale") <- scale

  return(std.values)
}

#Creates dataset of metadata needed for backtransformation of intake amounts.
create_backtransformation_data <- function(boxcox.lambda.data,
                                           minimum.amount.data,
                                           center.scale.data,
                                           episodic.biomarkers,
                                           daily.biomarkers) {

  #merge backtransformation parameters into one dataset on the variable name and by-variable(s) if they exist
  backtran.parameter.list <- list(boxcox.lambda.data[,c("variable", "tran_lambda")], minimum.amount.data[,c("variable", "minamount")], center.scale.data)
  backtran.parameter.data <- Reduce(function(x,y) merge(x,y,by="variable"), backtran.parameter.list)

  #add column for biomarker status
  backtran.parameter.data$biomarker <- backtran.parameter.data$variable %in% c(episodic.biomarkers, daily.biomarkers)

  return(backtran.parameter.data)
}


#' Find minimum consumption amounts
#'
#' @description Calculate minimum non-zero amount of each food and nutrient on
#'   consumption days.
#'
#' @details For each food and nutrient a minimum amount of consumption is set at
#'   half of the smallest non-zero amount consumed. For nutrients, this amount
#'   replaces zero values so that the Box-Cox transformation is valid. For both
#'   foods and nutrients, the minimum amount is also used in
#'   [nci_multivar_distrib()] as a lower bound on the backtransformed amount
#'   consumed.
#'
#'
#' @param input.data A data frame.
#' @param row.subset Logical vector of the same length as `nrow(input.data)`
#'   indicating which rows of `input.data` to use for calculating the minimum
#'   amounts.
#' @param episodic.variables Vector of episodic variables.
#' @param daily.variables Vector of daily variables.
#'
#' @returns A data frame with the following variables:
#' * variable: The name of the variable.
#' * minamount: Half of the minimum non-zero amount consumed for the variable.
#'
#' @export
#'
#' @examples
#' #subset NHANES data
#' nhanes.subset <- nhcvd[nhcvd$SDMVSTRA %in% c(48, 60, 72),]
#'
#' minimum.amount.data <- calculate_minimum_amount(input.data=nhanes.subset,
#'                                                 row.subset=(nhanes.subset$DAY == 1),
#'                                                 episodic.variables="G_WHOLE",
#'                                                 daily.variables="TSODI")
#' minimum.amount.data
calculate_minimum_amount <- function(input.data,
                                     row.subset=NULL,
                                     episodic.variables=NULL,
                                     daily.variables=NULL) {

  #initial data processing
  if(!is.data.frame(input.data)) {

    input.data <- as.data.frame(input.data)
  }

  if(!is.null(row.subset)) {

    input.data <- input.data[row.subset,]
  }

  #calculate minimum amounts of each food and nutrient on consumption days
  if(!is.null(episodic.variables) && length(episodic.variables) > 0) {

    episodic.data <- input.data[,episodic.variables,drop=FALSE]
    episodic.minimum.amounts <- apply(episodic.data, 2, function(food) min(food[!is.na(food) & food > 0])/2)
  } else {

    episodic.minimum.amounts <- NULL
  }

  if(!is.null(daily.variables) && length(daily.variables) > 0) {

    daily.data <- input.data[,daily.variables,drop=FALSE]
    daily.minimum.amounts <- apply(daily.data, 2, function(nutrient) min(nutrient[!is.na(nutrient) & nutrient > 0])/2)
  } else {

    daily.minimum.amounts <- NULL
  }

  #create minimum amount dataset
  all.minimum.amounts <- c(episodic.minimum.amounts, daily.minimum.amounts)
  all.variables <- c(episodic.variables, daily.variables)
  minimum.amount.data <- data.frame(variable=all.variables, minamount=all.minimum.amounts)

  return(minimum.amount.data)
}
