% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/BioTIP_update_3.3_02282020.R
\name{optimize.sd_selection}
\alias{optimize.sd_selection}
\title{Optimization of sd selection}
\usage{
optimize.sd_selection(
  df,
  samplesL,
  B = 100,
  percent = 0.8,
  times = 0.8,
  cutoff = 0.01,
  method = c("other", "reference", "previous", "itself", "longitudinal reference"),
  control_df = NULL,
  control_samplesL = NULL
)
}
\arguments{
\item{df}{A dataframe of numerics. The rows and columns
represent unique transcript IDs (geneID) and sample names, respectively.}

\item{samplesL}{A list of n vectors,  where n equals to the number of
states. Each vector gives the sample names in a state. Note that the vectors
(sample names) has to be among the column names of the R object 'df'.}

\item{B}{An integer indicating number of times to run this optimization, default 1000.}

\item{percent}{A numeric value indicating the percentage of samples will
be selected in each round of simulation.}

\item{times}{A numeric value indicating the percentage of \code{B} times a transcript
need to be selected in order to be considered a stable signature.}

\item{cutoff}{A positive numeric value. Default is 0.01. If < 1, automatically
goes to select top x percentage transcripts using the a selecting method (which is
either the \code{reference}, \code{other} or \code{previous} stage), e.g. by
default it will select top 1 percentage of the transcripts.}

\item{method}{Selection of methods from \code{reference}, \code{other}, \code{previous},
default uses \code{other}. Partial match enabled.
\itemize{
\item \code{itself}, or \code{longitudinal reference}. Some specific requirements for each
option:
\item \code{reference}, the reference has to be the first.
\item \code{previous}, make sure \code{sampleL} is in the right order from benign to malign.
\item \code{itself}, make sure the cutoff is smaller than 1.
\item \code{longitudinal reference} make sure control_df and control_samplesL are not NULL.
The row numbers of control_df is the same as df and all transcript in df are also in control_df.
}}

\item{control_df}{A count matrix with unique loci as row names and samples names
of control samples as column names,  only used for method \code{longitudinal reference}.}

\item{control_samplesL}{A list of characters with stages as names of control
samples,  required for method 'longitudinal reference'.}
}
\value{
A list of dataframe of filtered transcripts with the highest standard
deviation are selected from \code{df} based on a cutoff value assigned. The
resulting dataframe represents a subset of the raw input \code{df}.
}
\description{
The \code{optimize.sd_selection} filters a multi-state dataset
based on a cutoff value for standard deviation per state and optimizes.
By default, a cutoff value of 0.01 is used. Suggested if each state contains more than 10 samples.
}
\seealso{
\code{\link{sd_selection}}
}
\author{
Zhezhen Wang \email{zhezhen@uchicago.edu}
}
