% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ReferenceMappingMethods.R
\name{ReferenceMapping}
\alias{ReferenceMapping}
\alias{ReferenceMapping.SingleCellExperiment}
\alias{ReferenceMapping,SingleCellExperiment,SingleCellExperiment-method}
\title{Reference mapping}
\usage{
ReferenceMapping.SingleCellExperiment(
  ref,
  query,
  ref.label,
  label.prune.cutoff,
  scale.query.by,
  project.umap,
  select.icp.models,
  k.nn,
  dimred.name.prefix
)

\S4method{ReferenceMapping}{SingleCellExperiment,SingleCellExperiment}(
  ref,
  query,
  ref.label,
  label.prune.cutoff = 0.5,
  scale.query.by = NULL,
  project.umap = FALSE,
  select.icp.models = metadata(ref)$coralysis$pca.params$select.icp.tables,
  k.nn = 10,
  dimred.name.prefix = ""
)
}
\arguments{
\item{ref}{An object of \code{SingleCellExperiment} class trained with Coralysis
and after running \code{RunPCA(..., return.model = TRUE)} function.}

\item{query}{An object of \code{SingleCellExperiment} class to project onto
\code{ref}.}

\item{ref.label}{A character cell metadata column name from the \code{ref}
object to transfer to the queries.}

\item{label.prune.cutoff}{A numeric cutoff value used to prune low-confidence 
predicted cell labels, based on the confidence probability scores stored in the 
\code{coral_probability} column of \code{colData}. By default is \code{0.5}, i.e., 
cell labels with confidence scores less than or equal to 0.5 are considered 
unclassified and set to \code{NA}. The resulting pruned cell labels are stored in 
\code{pruned_coral_labels}. Set to \code{0} to ignore it.}

\item{scale.query.by}{Should the query data be scaled by \code{cell} or by
\code{feature}. By default is \code{NULL}, i.e., is not scaled. Scale it if
reference was scaled.}

\item{project.umap}{Project query data onto reference UMAP (logical). By
default \code{FALSE}. If \code{TRUE}, the \code{ref} object needs to have a
UMAP embedding obtained with \code{RunUMAP(..., return.model = TRUE)} function.}

\item{select.icp.models}{Select the reference ICP models to use for query
cluster probability prediction. By default \code{metadata(ref)$coralysis$pca.params$select.icp.tables},
i.e., the models selected to compute the reference PCA are selected.
If \code{NULL} all are used. Otherwise a numeric vector should be given
to select the ICP models of interest.}

\item{k.nn}{The number of \code{k} nearest neighbors to use in the classification
KNN algorithm used to transfer labels from the reference to queries (integer).
By default \code{10}.}

\item{dimred.name.prefix}{Dimensional reduction name prefix to add to the
computed PCA and UMAP. By default nothing is added, i.e.,
\code{dimred.name.prefix = ""}.}
}
\value{
An object of \code{SingleCellExperiment} class.
}
\description{
This function allows to project new query data sets onto a reference
built with Coralysis as well as transfer cell labels from the reference to queries.
}
\examples{
# Import package
suppressPackageStartupMessages(library("SingleCellExperiment"))

# Create toy SCE data
batches <- c("b1", "b2")
set.seed(239)
batch <- sample(x = batches, size = nrow(iris), replace = TRUE)
sce <- SingleCellExperiment(
    assays = list(logcounts = t(iris[, 1:4])),
    colData = DataFrame(
        "Species" = iris$Species,
        "Batch" = batch
    )
)
colnames(sce) <- paste0("samp", 1:ncol(sce))

# Create reference & query SCE objects
ref <- sce[, sce$Batch == "b1"]
query <- sce[, sce$Batch == "b2"]

# 1) Train the reference
set.seed(123)
ref <- RunParallelDivisiveICP(
    object = ref, k = 2, L = 25, C = 1,
    train.k.nn = 10, train.k.nn.prop = NULL,
    use.cluster.seed = FALSE,
    build.train.set = FALSE, ari.cutoff = 0.1,
    threads = 2, RNGseed = 1024
)
# 2) Compute reference PCA & UMAP
ref <- RunPCA(ref, p = 5, return.model = TRUE, pca.method = "stats")
set.seed(123)
ref <- RunUMAP(ref, return.model = TRUE)

# Plot
PlotDimRed(object = ref, color.by = "Species", legend.nrow = 1)

# 3) Project & predict query cell labels
map <- ReferenceMapping(
    ref = ref, query = query, ref.label = "Species",
    project.umap = TRUE
)

# Confusion matrix: predictions (rows) x ground-truth (cols)
preds_x_truth <- table(map$coral_labels, map$Species)
print(preds_x_truth)

# Accuracy score
acc <- sum(diag(preds_x_truth)) / sum(preds_x_truth) * 100
print(paste0("Coralysis accuracy score: ", round(acc), "\%"))

# Visualize: ground-truth, prediction, confidence scores
cowplot::plot_grid(
    PlotDimRed(
        object = map, color.by = "Species",
        legend.nrow = 1
    ),
    PlotDimRed(
        object = map, color.by = "coral_labels",
        legend.nrow = 1
    ),
    PlotExpression(
        object = map, color.by = "coral_probability",
        color.scale = "viridis"
    ),
    ncol = 2, align = "vh"
)

}
\keyword{ICP}
\keyword{LIBLINEAR}
\keyword{clustering}
\keyword{iterative}
\keyword{logistic}
\keyword{projection}
\keyword{regression}
