% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/computeVMDs.R
\name{filterVMDs}
\alias{filterVMDs}
\title{Filter VMDs}
\usage{
filterVMDs(
  methylationData,
  potentialVMDs,
  context = "CG",
  minCytosinesCount = 4,
  minReadsPerCytosine = 3,
  sdCutoffMethod = "per.high",
  percentage = 0.05,
  parallel = FALSE,
  BPPARAM = NULL,
  cores = NULL
)
}
\arguments{
\item{methylationData}{the methylation data in condition
(see \code{\link{ontSampleGRangesList}}).}

\item{potentialVMDs}{a \code{\link{GRanges}} object with potential VMDs 
where to compute the VMDs. This can be a a list of gene and/or transposable 
elements coordinates.}

\item{context}{the context in which the VMDs are computed (\code{"CG"}, 
\code{"CHG"} or \code{"CHH"}).}

\item{minCytosinesCount}{VMDs with less cytosines in the specified context 
than \code{minCytosinesCount} will be discarded.}

\item{minReadsPerCytosine}{VMDs with the average number of reads lower than 
\code{minReadsPerCytosine} are discarded.}

\item{sdCutoffMethod}{Character string specifying how to determine the cutoff
for filtering VMDs based on their methylation variance (weighted standard deviation).
Available options are:
\describe{
  \item{\code{"per.high"}}{Selects the top \code{percentage} of regions with the highest variance (standard deviation).}
  \item{\code{"per.low"}}{Selects the bottom \code{percentage} of regions with the lowest variance.}
  \item{\code{"EDE.high"}}{Uses the elbow point (inflection/knee) from the descendingly sorted variance values to determine a data-driven high-variance cutoff. Retains regions with SD above this elbow point.}
  \item{\code{"EDE.low"}}{Uses the elbow point from the ascendingly sorted variance values to define a low-variance cutoff. Retains regions with SD below this point.}
}
This allows either quantile-based filtering or automatic detection of variance thresholds based on distribution shape.}

\item{percentage}{Numeric cutoff used when \code{sdCutoffMethod} is set to
\code{"per.high"} or \code{"per.low"}. Represents the quantile threshold:
for example, \code{percentage = 0.05} keeps the top 5\% or bottom 5\% of
bins based on weighted standard deviation, depending on the selected method.}

\item{parallel}{Logical; run in parallel if \code{TRUE}.}

\item{BPPARAM}{A \code{BiocParallelParam} object controlling parallel execution.
This value will automatically set when parallel is \code{TRUE}, also able to set as manually.}

\item{cores}{Integer number of workers (must not exceed BPPARAM$workers).
This value will automatically set as the maximum number of system workers,
also able to set as manually.}
}
\value{
a \code{\link{GRanges}} object with 9 metadata columns that contain 
the VMDs; see \code{\link{computeVMDs}}.
}
\description{
This function verifies whether a set of potential VMDs (e.g. genes, 
transposons, CpG islands) are variance methylated or not.
}
\examples{
# load the ONT methylation data 
data(ontSampleGRangesList)
# load the gene annotation data
data(GEs_hg38)

# select the transcript
transcript <- GEs_hg38[which(GEs_hg38$type == "transcript")]

# the regions where to compute the VMDs
regions <- GRanges(seqnames = Rle("chr1"), ranges = IRanges(1E6+5E5,2E6))
transcript <- transcript[overlapsAny(transcript, regions)]

# filter genes that are variance methylated in the two conditions
VMDsGenesCG <- filterVMDs(ontSampleGRangesList[["GM18501"]], 
               potentialVMDs = transcript, 
               context = "CG", sdCutoffMethod = "per.high", percentage = 0.05,
               minCytosinesCount = 4, minReadsPerCytosine = 3, cores = 1)

}
\seealso{
\code{\link{computeVMDs}} 
and \code{\link{analyseReadsInsideRegionsForCondition}}
}
\author{
Nicolae Radu Zabet and Young Jun Kim
}
