% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/TCGA_Download_Preprocess.R
\name{TCGA_Preprocess_GeneExpression}
\alias{TCGA_Preprocess_GeneExpression}
\title{The TCGA_Preprocess_GeneExpression function}
\usage{
TCGA_Preprocess_GeneExpression(
  CancerSite,
  MAdirectories,
  mode = "Regular",
  doBatchCorrection = FALSE,
  batch.correction.method = "Seurat",
  MissingValueThresholdGene = 0.3,
  MissingValueThresholdSample = 0.1,
  cores = 1
)
}
\arguments{
\item{CancerSite}{character string indicating the TCGA cancer code.}

\item{MAdirectories}{character vector with directories with the downloaded data. It can be the object returned by the GEO_Download_GeneExpression function.}

\item{mode}{character string indicating whether the genes in the gene expression data are miRNAs or lncRNAs. Should be either 'Regular', 'Enhancer', 'miRNA' or 'lncRNA'. This value should be consistent with the same parameter in the TCGA_Download_GeneExpression function. Default: 'Regular'.}

\item{doBatchCorrection}{logical indicating whether to perform batch effect correction. Default: False.}

\item{batch.correction.method}{character string indicating the method to perform batch correction. The value should be either 'Seurat' or 'Combat'. Default: 'Seurat'. Seurat is much fatster than the Combat.}

\item{MissingValueThresholdGene}{threshold for missing values per gene. Genes with a percentage of NAs greater than this threshold are removed. Default is 0.3.}

\item{MissingValueThresholdSample}{threshold for missing values per sample. Samples with a percentage of NAs greater than this threshold are removed. Default is 0.1.}

\item{cores}{integer indicating the number of cores to be used for performing batch correction with Combat}
}
\value{
pre-processed gene expression data matrix.
}
\description{
Pre-processes gene expression data from TCGA.
}
\details{
Pre-process includes eliminating samples and genes with too many NAs, imputing NAs, and doing Batch correction. If the rownames of the gene expression data are ensembl ENSG names or ENST names, the function will convert them to the human gene symbol (HGNC).
}
\examples{
\donttest{

# Example #1: Preprocessing gene expression for Regular mode

 GEdirectories <- TCGA_Download_GeneExpression(CancerSite = 'OV',
                                               TargetDirectory = tempdir())
 GEProcessedData <- TCGA_Preprocess_GeneExpression(CancerSite = 'OV',
                                                   MAdirectories = GEdirectories)

# Example #2: Preprocessing gene expression for miRNA mode

 GEdirectories <- TCGA_Download_GeneExpression(CancerSite = 'OV',
                                               TargetDirectory = tempdir(),
                                               mode = 'miRNA')

 GEProcessedData <- TCGA_Preprocess_GeneExpression(CancerSite = 'OV',
                                                   MAdirectories = GEdirectories,
                                                   mode = 'miRNA')

# Example #3: Preprocessing gene expression for lncRNA mode

 GEdirectories <- TCGA_Download_GeneExpression(CancerSite = 'OV',
                                               TargetDirectory = tempdir(),
                                               mode = 'lncRNA')

 GEProcessedData <- TCGA_Preprocess_GeneExpression(CancerSite = 'OV',
                                                   MAdirectories = GEdirectories,
                                                   mode = 'lncRNA')

}

}
\keyword{preprocess}
