% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/upsample.R
\name{tof_upsample_neighbor}
\alias{tof_upsample_neighbor}
\title{Upsample cells into the cluster of their nearest neighbor a reference dataset}
\usage{
tof_upsample_neighbor(
  tof_tibble,
  reference_tibble,
  reference_cluster_col,
  upsample_cols = where(tof_is_numeric),
  num_neighbors = 1L,
  distance_function = c("euclidean", "cosine", "l2", "ip")
)
}
\arguments{
\item{tof_tibble}{A `tibble` or `tof_tbl` containing cells to be upsampled
into their nearest reference subpopulation.}

\item{reference_tibble}{A `tibble` or `tof_tibble` containing cells that have
already been clustered or manually gated into subpopulations.}

\item{reference_cluster_col}{An unquoted column name indicating which column in
`reference_tibble` contains the subpopulation label (or cluster id) for
each cell in `reference_tibble`.}

\item{upsample_cols}{Unquoted column names indicating which columns in `tof_tibble` to
use in computing the distances used for upsampling. Defaults to all numeric columns
in `tof_tibble`. Supports tidyselect helpers.}

\item{num_neighbors}{An integer indicating how many neighbors should be used
in the nearest neighbor calculation. Clusters are assigned based on majority
vote.}

\item{distance_function}{A string indicating which distance function should
be used to perform the upsampling. Options are "euclidean" (the default) and
"cosine".}
}
\value{
A tibble with one column named
`.upsample_cluster`, a character vector of length `nrow(tof_tibble)`
indicating the id of the reference cluster to which each cell
(i.e. each row) in `tof_tibble` was assigned.
}
\description{
This function performs upsampling on CyTOF data
by sorting single cells (passed into the function as `tof_tibble`) into
their most phenotypically similar cell subpopulation in a reference dataset
(passed into the function as `reference_tibble`). It does so by finding
each cell in `tof_tibble`'s nearest neighbor in `reference_tibble` and assigning
it to the cluster to which its nearest neighbor belongs. The nearest neighbor
calculation can be performed with either euclidean or cosine distance.
}
\examples{

# simulate single-cell data (and reference data with clusters to upsample
# into
sim_data <-
    dplyr::tibble(
        cd45 = rnorm(n = 1000),
        cd38 = rnorm(n = 1000),
        cd34 = rnorm(n = 1000),
        cd19 = rnorm(n = 1000)
    )

reference_data <-
    dplyr::tibble(
        cd45 = rnorm(n = 200),
        cd38 = rnorm(n = 200),
        cd34 = rnorm(n = 200),
        cd19 = rnorm(n = 200),
        cluster_id = c(rep("a", times = 100), rep("b", times = 100))
    )

# upsample using euclidean distance
tof_upsample_neighbor(
    tof_tibble = sim_data,
    reference_tibble = reference_data,
    reference_cluster_col = cluster_id
)

# upsample using cosine distance
tof_upsample_neighbor(
    tof_tibble = sim_data,
    reference_tibble = reference_data,
    reference_cluster_col = cluster_id,
    distance_function = "cosine"
)

}
