% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/discrete_discover.R
\name{discrete_discover}
\alias{discrete_discover}
\title{Unsupervised meta-analytical discovery and validation of discrete clustering 
structures in microbial abundance data}
\usage{
discrete_discover(D, batch, data, control)
}
\arguments{
\item{D}{sample-by-sample dissimilarity measurements. Should be provided as 
a \code{\link[stats]{dist}} object.}

\item{batch}{name of the batch variable. This variable in data should be a
factor variable and will be converted to so with a warning if otherwise.}

\item{data}{data frame of metadata, columns must include batch.}

\item{control}{a named list of additional control parameters. See details.}
}
\value{
a list, with the following components:
\describe{
\item{internal_mean, internal_se}{
matrices of internal clustering structure evaluation measurements 
(prediction strengths). Columns and rows corresponds to different batches and
different numbers of clusters, respectively. \code{internal_mean} and
\code{internal_se}, as the names suggest, are the mean and standard error of
prediction strengths for each batch/cluster number.
}
\item{external_mean, external_se}{
same structure as \code{internal_mean} and \code{internal_se}, but records
external clustering structure evaluation measurements (generalized prediction
strength).
}
\item{control}{list of additional control parameters used in the function
call.
}
}
}
\description{
\code{discrete_discover} takes as input sample-by-sample dissimilarity 
measurements (generated from microbial abundance profiles), and performs 
unsupervised clustering within each batch across a range of cluster numbers. 
It then evaluates the support for each cluster number with both internal 
(i.e., samples within the batch) and external (i.e., samples in other 
batches) data. Internal evaluation is realized with 
\code{\link[fpc]{prediction.strength}} and external evaluation is based on
a generalized version of the same method. \code{discrete_discover} generates 
as output the evaluation statistics for each cluster number. A cluster number
with good support from both internal and external evaluations provides 
meta-analytical evidence for discrete structures in the microbial abundance 
profiles.
}
\details{
\code{control} should be provided as a named list of the following components
(can be a subset).
\describe{

\item{k_max}{
integer. Maximum number of clusters to evaluate. \code{discrete_discover} 
will evaluate clustering structures corresponding to cluster numbers ranging
from 2 to \code{k_max}. Default to 10.
}
\item{cluster_function}{
an interface function. This function will be used for unsupervised clustering
for discrete structure evaluation. This corresponds to the 
\code{clustermethod} parameter in 
\code{\link[fpc]{prediction.strength}}, and similarly, should also follow the
specifications as detailed in  \code{\link[fpc]{clusterboot}}. Default to
\code{\link[fpc:kmeansCBI]{claraCBI}}
}
\item{classify_method}{
character. Classification method used to assign observations in the method's
internal and external evaluation stage. Corresponds to the 
\code{classification} parameter in \code{\link[fpc]{prediction.strength}}, 
and can only be either \code{"centroid"} or \code{"knn"}. Default to 
"centroid".
}
\item{M}{
integer. Number of random iterations to partition the batch during method's 
internal evaluation. Corresponds to the \code{M} parameter in 
\code{\link[fpc]{prediction.strength}}. Default to 30.
}
\item{nnk}{
integer. Numbber of nearest neighbors if \code{classify_method="knn"}. 
Corresponds to the \code{nnk} parameter in 
\code{\link[fpc]{prediction.strength}}. Default to 1.
}
\item{diagnostic_plot}{
character. Name for the generated diagnostic figure file. Default to 
\code{"discrete_diagnostic.pdf"}. Can be set to \code{NULL} in which 
case no output will be generated.}
\item{verbose}{
logical. Indicates whether or not verbose information will be printed.
}
}
}
\examples{
data("CRC_abd", "CRC_meta")
# Calculate Bray-Curtis dissimilarity between the samples
library(vegan)
D <- vegdist(t(CRC_abd))
fit_discrete <- discrete_discover(D = D,
                                  batch = "studyID",
                                  data = CRC_meta)
}
\author{
Siyuan Ma, \email{siyuanma@g.harvard.edu}
}
