% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/upsample.R
\name{tof_upsample}
\alias{tof_upsample}
\title{Upsample cells into the closest cluster in a reference dataset}
\usage{
tof_upsample(
  tof_tibble,
  reference_tibble,
  reference_cluster_col,
  upsample_cols = where(tof_is_numeric),
  ...,
  augment = TRUE,
  method = c("distance", "neighbor")
)
}
\arguments{
\item{tof_tibble}{A `tibble` or `tof_tbl` containing cells to be upsampled
into their nearest reference subpopulation.}

\item{reference_tibble}{A `tibble` or `tof_tibble` containing cells that have
already been clustered or manually gated into subpopulations.}

\item{reference_cluster_col}{An unquoted column name indicating which column in
`reference_tibble` contains the subpopulation label (or cluster id) for
each cell in `reference_tibble`.}

\item{upsample_cols}{Unquoted column names indicating which columns in `tof_tibble` to
use in computing the distances used for upsampling. Defaults to all numeric columns
in `tof_tibble`. Supports tidyselect helpers.}

\item{...}{Additional arguments to pass to the `tof_upsample_*`
function family member corresponding to the chosen method.}

\item{augment}{A boolean value indicating if the output should column-bind the
cluster ids of each cell as a new column in `tof_tibble` (TRUE, the default) or if
a single-column tibble including only the cluster ids should be returned (FALSE).}

\item{method}{A string indicating which clustering methods should be used. Valid
values include "distance" (default) and "neighbor".}
}
\value{
A `tof_tbl` or `tibble` If augment = FALSE, it will have a single column encoding
the upsampled cluster ids for each cell in `tof_tibble`.
If augment = TRUE, it will have
ncol(tof_tibble) + 1 columns: each of the (unaltered) columns in `tof_tibble`
plus an additional column encoding the cluster ids.
}
\description{
This function performs distance-based upsampling on CyTOF data
by sorting single cells (passed into the function as `tof_tibble`) into
their most phenotypically similar cell subpopulation in a reference dataset
(passed into the function as `reference_tibble`). It does so by calculating
the distance (either mahalanobis, cosine, or pearson) between each cell in
`tof_tibble` and the centroid of each cluster in `reference_tibble`, then
sorting cells into the cluster corresponding to their closest centroid.
}
\examples{
# simulate single-cell data (and reference data with clusters to upsample
# into
sim_data <-
    dplyr::tibble(
        cd45 = rnorm(n = 1000),
        cd38 = rnorm(n = 1000),
        cd34 = rnorm(n = 1000),
        cd19 = rnorm(n = 1000)
    )
reference_data <-
    dplyr::tibble(
        cd45 = rnorm(n = 200),
        cd38 = rnorm(n = 200),
        cd34 = rnorm(n = 200),
        cd19 = rnorm(n = 200),
        cluster_id = c(rep("a", times = 100), rep("b", times = 100))
    )

# upsample using distance to cluster centroids
tof_upsample(
    tof_tibble = sim_data,
    reference_tibble = reference_data,
    reference_cluster_col = cluster_id,
    method = "distance"
)

# upsample using distance to nearest neighbor
tof_upsample(
    tof_tibble = sim_data,
    reference_tibble = reference_data,
    reference_cluster_col = cluster_id,
    method = "neighbor"
)

}
