% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hausdorffDistPlot.R
\name{hausdorffDistPlot}
\alias{hausdorffDistPlot}
\title{Create diagnostic plot of Hausdorff distances}
\usage{
hausdorffDistPlot(
  mat,
  Nvec,
  Nrep = 5,
  q = 1e-04,
  methods = c("geosketch", "scsampler", "uniform"),
  extraArgs = list()
)
}
\arguments{
\item{mat}{m x n matrix. Samples (the dimension along which to subsample)
should be in the rows, features in the columns.}

\item{Nvec}{Numeric vector of sketch sizes.}

\item{Nrep}{Numeric scalar indicating the number of sketches to draw
for each sketch size.}

\item{q}{Numeric scalar in [0,1], indicating the fraction of largest
minimum distances to discard when calculating the robust Hausdorff
distance. Setting q=0 gives the classical Hausdorff distance.
The default is 1e-4, as suggested by Hie et al (2019).}

\item{methods}{Character vector, indicating which method(s) to include
in the plot. Should be a subset of c("geosketch", "scsampler",
"uniform"), where "uniform" randomly samples from input features
with uniform probabilities.}

\item{extraArgs}{Named list providing extra arguments to the respective
methods (beyond the matrix and the sketch size). The names of the list
should be the method names (currently, "geosketch" or "scsampler"),
and each list element should be a named list of argument values. See
the examples for an illustration of how to use this argument. Note that
the \code{seed} argument, if provided to any of the methods,
will be ignored (since it would imply providing the same seed for each
repeated run of the sketching).}
}
\value{
A \code{ggplot} object.
}
\description{
Create diagnostic plot showing the Hausdorff distance between a sketch
and the full data set, for varying sketch sizes. For reproducibility,
seed the random number generator before calling this function using
\code{set.seed}.
}
\examples{
## Generate example data matrix
mat <- matrix(rnorm(1000), nrow = 100)

## Generate diagnostic Hausdorff distance plot
hausdorffDistPlot(mat, Nvec = c(10, 25, 50),
                  methods = c("geosketch", "uniform"))

## Provide additional arguments for geosketch
hausdorffDistPlot(mat, Nvec = c(10, 25, 50), Nrep = 2,
                  methods = c("geosketch", "uniform"),
                  extraArgs = list(geosketch = list(max_iter = 100)))

}
\references{
Hie et al (2019): Geometric sketching compactly summarizes the
single-cell transcriptomic landscape. Cell Systems 8, 483–493.

Song et al (2022): scSampler: fast diversity-preserving subsampling of
large-scale single-cell transcriptomic data.
bioRxiv doi:10.1101/2022.01.15.476407

Huttenlocher et al (1993): Comparing images using the Hausdorff
distance. IEEE Transactions on Pattern Analysis and Machine
Intelligence 15(9), 850-863.
}
\author{
Charlotte Soneson, Michael Stadler
}
