% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stats.R
\name{normalize_data}
\alias{normalize_data}
\title{Normalize expression data from a SummarizedExperiment or matrix
to be used in \code{hipathia}}
\usage{
normalize_data(
  data,
  sel_assay = 1,
  by_quantiles = FALSE,
  by_gene = FALSE,
  percentil = FALSE,
  truncation_percentil = NULL
)
}
\arguments{
\item{data}{Either a SummarizedExperiment or a matrix of gene expression.}

\item{sel_assay}{Character or integer, indicating the assay to be normalized
in the SummarizedExperiment. Default is 1.}

\item{by_quantiles}{Boolean, whether to normalize the data by quantiles.
Default is FALSE.}

\item{by_gene}{Boolean, whether to transform the rank of each row of the
matrix to [0,1]. Default is FALSE.}

\item{percentil}{Boolean, whether to take as value the percentil of each
sample in the corresponding distribution.}

\item{truncation_percentil}{Real number p in [0,1]. When provided, values
beyond percentil p are truncated to the value of percentil p, and values
beyond 1-p are truncated to percentil 1-p. By default no truncation
is performed.}
}
\value{
Matrix of gene expression whose values are in [0,1].
}
\description{
Transforms the rank of the SummarizedExperiment or matrix of gene expression
to [0,1] in order
to be processed by \code{hipathia}. The transformation may be performed
in two different ways. If \code{percentil = FALSE}, the transformation
is a re-scaling of the rank of the matrix. If \code{percentil = TRUE},
the transformation is performed assigning to each cell its percentil in
the corresponding distribution. This option is recommended for
distributions with very long tails.
}
\details{
This transformation may be applied either to the whole matrix
(by setting \code{by_gene = FALSE}), which we strongly recommend, or to
each of the rows (by setting \code{by_gene = TRUE}), allowing each gene
to have its own scale.

A previous quantiles normalization may be applied by setting
\code{by_quantiles = TRUE}. This is recommended for noisy data.

For distributions with extreme outlayer values, a percentil \code{p}
may be given to the parameter \code{truncation_percentil}. When provided,
values beyond percentil p are truncated to the value of percentil p, and
values beyond 1-p are truncated to percentil 1-p. This step is performed
before any other tranformation. By default no truncation is performed.
}
\examples{
data("brca_data")
trans_data <- translate_data(brca_data, "hsa")
exp_data <- normalize_data(trans_data)
exp_data <- normalize_data(trans_data, by_quantiles = TRUE,
truncation_percentil=0.95)

}
