% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/BreastSubtypeR_methods.R
\name{Mapping}
\alias{Mapping}
\title{Gene ID Mapping}
\usage{
Mapping(
  se_obj,
  RawCounts = FALSE,
  method = c("max", "mean", "median", "iqr", "stdev"),
  impute = TRUE,
  verbose = TRUE
)
}
\arguments{
\item{se_obj}{A \code{SummarizedExperiment} object containing:
\itemize{
\item \strong{Assay data}:
\itemize{
\item If \code{RawCounts = FALSE}: \code{assay()} must contain log2-normalized expression
(e.g., pre-normalized microarray/nCounter, or log2(FPKM+1) RNAseq).
\item If \code{RawCounts = TRUE}: \code{assay()} contains raw RNA-seq counts (see \code{RawCounts}).
}
\item \strong{Row metadata} (required):
\itemize{
\item \code{"probe"}: feature identifiers (e.g., gene symbols or probe IDs)
\item \code{"ENTREZID"}: corresponding Entrez Gene IDs.
\item If row names are gene symbols, provide an additional \code{SYMBOL} column,
renamed as \code{probe}.
}
\item \strong{Column metadata} (optional): sample-level metadata in \code{colData()}.
}}

\item{RawCounts}{Logical. If \code{TRUE}, indicates that \code{assay()} holds raw RNA-seq counts.
In this case, \code{rowData()} must also provide gene lengths
(column \code{"Length"}, in base pairs), used for:
\itemize{
\item NC-based methods: log2-CPM (upper-quartile normalization).
\item SSP-based methods: linear FPKM (not log-transformed).
}}

\item{method}{Strategy for resolving duplicate probes/genes. Options:
\itemize{
\item \code{"iqr"}: probe with highest interquartile range (short-oligo arrays, e.g., Affymetrix).
\item \code{"mean"}: probe with highest mean expression (long-oligo arrays, e.g., Agilent/Illumina).
\item \code{"max"}: probe with highest expression value (often used for RNA-seq).
\item \code{"stdev"}: probe with highest standard deviation.
\item \code{"median"}: probe with highest median expression.
}}

\item{impute}{Logical. If \code{TRUE}, applies KNN-based imputation to missing values.}

\item{verbose}{Logical. If \code{TRUE}, prints progress messages during execution.}
}
\value{
A named list with:
\describe{
\item{se_NC}{\code{SummarizedExperiment} holding log2-transformed data prepared for NC-based methods
(assay name: \code{counts}).}
\item{se_SSP}{\code{SummarizedExperiment} holding linear-scale data prepared for SSP-based methods
(assay name: \code{counts}).}
}
}
\description{
Preprocesses and maps gene expression input to prepare for
intrinsic subtyping workflows (NC- and SSP-based).
}
\details{
\code{Mapping()} supports multiple input types:
\itemize{
\item \strong{Raw RNA-seq counts} (with gene lengths): normalized to CPM (NC) or FPKM (SSP).
\item \strong{Precomputed log2(FPKM+1)}: used directly for NC; back-transformed for SSP.
\item \strong{log2-normalized microarray/nCounter data}: used directly for NC; back-transformed for SSP.
}

This design allows users to supply a single expression format, while
BreastSubtypeR automatically applies method-specific preprocessing.
}
\examples{
if (requireNamespace("SummarizedExperiment", quietly = TRUE)) {
    # Using example raw RNA-seq counts (with gene lengths)
    data("TCGABRCAobj")
    se_obj_counts <- TCGABRCAobj$se_obj[, 1:3] # tiny subset to keep checks fast
    res <- Mapping(se_obj_counts, RawCounts = TRUE)

    # Using example pre-normalized log2(FPKM+0.1)
    data("OSLO2EMIT0obj")
    se_obj_fpkm <- OSLO2EMIT0obj$se_obj[, 1:3] # tiny subset to keep checks fast
    res <- Mapping(se_obj_fpkm, RawCounts = FALSE)
}

}
\references{
Yang Q, Hartman J, Sifakis EG.
\emph{BreastSubtypeR: A Unified R/Bioconductor Package for Intrinsic Molecular Subtyping in Breast Cancer Research.}
NAR Genomics and Bioinformatics. 2025. https://doi.org/10.1093/nargab/lqaf131. Selected as Editor’s Choice.
}
