% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cluster_profiles_mle.R
\name{cluster_profiles_mle}
\alias{cluster_profiles_mle}
\alias{cluster_profile_mle}
\alias{cluster_mle}
\title{Cluster methylation profiles using EM}
\usage{
cluster_profiles_mle(
  X,
  K = 3,
  model = NULL,
  basis = NULL,
  H = NULL,
  pi_k = NULL,
  lambda = 0.5,
  beta_dispersion = 5,
  gaussian_sigma = rep(0.2, K),
  w = NULL,
  em_max_iter = 50,
  epsilon_conv = 1e-04,
  opt_method = "CG",
  opt_itnmax = 50,
  init_opt_itnmax = 30,
  is_parallel = FALSE,
  no_cores = NULL,
  is_verbose = FALSE,
  ...
)
}
\arguments{
\item{X}{The input data, which has to be a \code{\link[base]{list}} of
elements of length N, where each element is an \code{L X C} matrix, where L
are the total number of observations. The first column contains the input
observations x (i.e. CpG locations). If
"binomial" model then C=3, and 2nd and 3rd columns contain total number of
trials and number of successes respectively. If "bernoulli" or "gaussian"
model, then C=2 containing the output y (e.g. methylation level). If "beta"
model, then C=3, where 2nd column contains output y and 3rd column the
dispersion parameter.}

\item{K}{Integer denoting the total number of clusters K.}

\item{model}{Observation model name as character string. It can be either
'bernoulli', 'binomial', 'beta' or 'gaussian'.}

\item{basis}{A 'basis' object. E.g. see \code{\link{create_basis}}. If NULL,
will an RBF object will be created.}

\item{H}{Optional, design matrix of the input data X. If NULL, H will be
computed inside the function.}

\item{pi_k}{Vector of length K, denoting the mixing proportions.}

\item{lambda}{The complexity penalty coefficient for ridge regression.}

\item{beta_dispersion}{Dispersion parameter, only used for Beta distribution
and will be the same for all observations.}

\item{gaussian_sigma}{Initial standard deviation of the noise term, only used
when having "gaussian" observation model.}

\item{w}{Optional, an (M+1)xK matrix of the initial parameters, where each
column consists of the basis function coefficients for each corresponding
cluster k. If NULL, will be assigned with default values.}

\item{em_max_iter}{Integer denoting the maximum number of EM iterations.}

\item{epsilon_conv}{Numeric denoting the convergence threshold for EM.}

\item{opt_method}{The optimization method to be used. See
\code{\link[stats]{optim}} for possible methods. Default is "CG".}

\item{opt_itnmax}{Optional argument giving the maximum number of iterations
for the corresponding method. See \code{\link[stats]{optim}} for details.}

\item{init_opt_itnmax}{Optimization iterations for obtaining the initial EM
parameter values.}

\item{is_parallel}{Logical, indicating if code should be run in parallel.}

\item{no_cores}{Number of cores to be used, default is max_no_cores - 1.}

\item{is_verbose}{Logical, print results during EM iterations.}

\item{...}{Additional parameters.}
}
\value{
An object of class \code{cluster_profiles_mle_}"obs_model" with the
  following elements: \itemize{ \item{ \code{W}: An (M+1) X K matrix with the
  optimized parameter values for each cluster. Each column of the matrix
  corresponds a different cluster k. M are the number of basis functions.}
  \item{ \code{pi_k}: Mixing proportions. } \item{ \code{r_nk}: An (N X K)
  responsibility matrix of each observations being explained by a specific
  cluster. }  \item{ \code{basis}: The basis object. } \item{\code{nll}: The
  negative log likelihood vector.} \item{\code{labels}: Cluster assignment
  labels.} \item{\code{bic}: Bayesian Information Criterion metric.}
  \item{\code{aic}: Akaike Information Criterion metric.} \item{\code{icl}:
  Integrated Complete Likelihood criterion metric.}
  \item{\code{gaussian_sigma}: Optimized standard deviation for gaussian
  observation model.} }
}
\description{
General purpose functions for clustering latent profiles for
  different observation models using maximum likelihood estimation (MLE) and
  the EM algorithm. Initially, it performs parameter checking, and
  initializes main parameters, such as mixing proportions, basis function
  coefficients, then the EM algorithm is applied and finally model selection
  metrics are calculated, such as BIC and AIC.
}
\section{Details}{
 The beta regression model is based on alternative
  parameterization of the beta density in terms of the mean and dispersion
  parameter: \url{https://cran.r-project.org/web/packages/betareg/}. For
  modelling details for Binomial/Bernoulli observation model check the paper
  for BPRMeth:
  \url{https://academic.oup.com/bioinformatics/article/32/17/i405/2450762} .
}

\examples{
# Example of optimizing parameters for synthetic data using 3 RBFs

basis <- create_rbf_object(M=3)
out <- cluster_profiles_mle(X = binomial_data, model = "binomial",
  basis=basis, em_max_iter = 5, opt_itnmax = 5, init_opt_itnmax=5,
  is_parallel = FALSE)

#-------------------------------------

basis <- create_rbf_object(M=3)
out <- cluster_profiles_mle(X = gaussian_data, model = "gaussian",
  basis=basis, em_max_iter = 5, opt_itnmax = 5, init_opt_itnmax=5,
  is_parallel = FALSE)

}
\seealso{
\code{\link{create_basis}}, \code{\link{cluster_profiles_vb}}
  \code{\link{infer_profiles_vb}}, \code{\link{infer_profiles_mle}},
  \code{\link{infer_profiles_gibbs}}, \code{\link{create_region_object}}
}
\author{
C.A.Kapourani \email{C.A.Kapourani@ed.ac.uk}
}
