% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step7ExpressionVsDNAMethylationScatterplots.R
\name{step7ExpressionVsDNAMethylationScatterplots}
\alias{step7ExpressionVsDNAMethylationScatterplots}
\title{Create scatterplots displaying the expression of the top genes and the
methylation levels of each of their linked RE DNA methylation sites,
optionally incorporating copy number variation, somatic mutation, and purity
data}
\usage{
step7ExpressionVsDNAMethylationScatterplots(
  TENETMultiAssayExperiment,
  geneAnnotationDataset = NA,
  hypermethGplusAnalysis = TRUE,
  hypomethGplusAnalysis = TRUE,
  topGeneNumber = 10,
  DNAMethylationSites = NA,
  simpleOrComplex = "simple",
  CNVData = NA,
  SMData = NA,
  purityData = NA,
  coreCount = 1
)
}
\arguments{
\item{TENETMultiAssayExperiment}{Specify a MultiAssayExperiment object
containing expression and methylation SummarizedExperiment objects, such as
one created by the TCGADownloader function. The object's metadata
must contain the results from the \code{step5OptimizeLinks} and
\code{step6DNAMethylationSitesPerGeneTabulation} functions.}

\item{geneAnnotationDataset}{Specify a gene annotation dataset which is
used to identify names for genes by their Ensembl IDs. The argument must be
either a GRanges object (such as one imported via \code{rtracklayer::import}) or a
path to a GFF3 or GTF file. Both GENCODE and Ensembl annotations are
supported. Other annotation datasets may work, but have not been tested.
See the "Input data" section of the vignette for information on the required
dataset format.
Specify NA to use the gene names listed in the "geneName" column of the
elementMetadata of the rowRanges of the "expression" SummarizedExperiment
object within the TENETMultiAssayExperiment object. Defaults to NA.}

\item{hypermethGplusAnalysis}{Set to TRUE to create scatterplots for genes
with hypermethylated RE DNA methylation sites with G+ links and each of their
linked RE DNA methylation sites. Defaults to TRUE.}

\item{hypomethGplusAnalysis}{Set to TRUE to create scatterplots for genes
with hypomethylated RE DNA methylation sites with G+ links and each of their
linked RE DNA methylation sites. Defaults to TRUE.}

\item{topGeneNumber}{Specify the number of top genes and TFs, based on the
most linked RE DNA methylation sites of a given analysis type, for which to
create scatterplots. Defaults to 10.}

\item{DNAMethylationSites}{Supply a vector of RE DNA methylation site IDs for
which scatterplots will be generated, if these sites have any linked
genes/TFs with expression in each specified analysis type.}

\item{simpleOrComplex}{Set to 'complex' to incorporate copy number variation,
somatic mutation, and purity data into the scatterplots. Otherwise, set to
'simple'. If set to 'complex', copy number variation, somatic mutation, and
purity data must be provided via the \code{CNVData}, \code{SMData}, and \code{purityData}
arguments respectively. \strong{Note:} At this time, either all or none of these
optional data types must be provided. Defaults to 'simple'.}

\item{CNVData}{Specify a dataset containing CNV status for each of the top
genes, as selected by the analysis type and 'topGeneNumber' arguments, in
each sample in the TENETMultiAssayExperiment. CNV status must be an
integer representing the change in copy number for each gene, with negative
numbers representing a loss and positive numbers representing a gain.
\strong{Note:} Copy number changes of 2 or more will be grouped together. The
dataset may be given as a data frame, matrix, or TSV file path. If it is a
data frame or matrix, its rownames must contain sample names. If a TSV file
is provided, the first column must contain sample names, and the first row
must contain column headers. Sample names must match those in the colData of
the TENETMultiAssayExperiment object. Column names must contain gene IDs
followed by "_CNV". If set to NA, the data will be loaded from the colData of
the TENETMultiAssayExperiment object. \strong{Note:} If data are missing for a
given gene, the plot will be generated without considering its CNV status.
Defaults to NA, and is only considered if \code{simpleOrComplex} is set to
"complex".}

\item{SMData}{Specify a dataset containing the somatic mutation status for
each of the top genes in each sample in the TENETMultiAssayExperiment. This
argument behaves the same way as the \code{CNVData} argument, except that the
names of the columns containing SM status must end with "_SM", and the status
must be an integer 0 or 1 or a string "no mutation" or "mutation". Defaults
to NA.}

\item{purityData}{Specify the cellularity/purity data for each sample in the
TENETMultiAssayExperiment. Purity values must range from 0 to 1. The dataset
may be given as a vector, data frame, matrix, or TSV file path. If a vector
is given, the names of the vector elements must correspond to the names of
the samples in the rownames of the colData of the TENETMultiAssayExperiment
object. If no names are provided for the vector, then the number of elements
in the vector must equal the number of samples in the colData, and it is
assumed to align with the samples as they are ordered in the colData. If a
data frame, matrix, or TSV file is given, it must be in the same format as
for the \code{CNVData} argument, except that the first column of data (excluding
the rownames) must contain the purity data. If this argument is set to NA,
purity data will be loaded from the "purity" column of the colData of the
TENETMultiAssayExperiment object. Defaults to NA, and is only considered if
'simpleOrComplex' is set to "complex".}

\item{coreCount}{Argument passed as the mc.cores argument to mcmapply. See
\code{?parallel::mcmapply} for more details. Defaults to 1.}
}
\value{
Returns the MultiAssayExperiment object given as the
TENETMultiAssayExperiment argument with an additional list
named 'step7ExpressionVsDNAMethylationScatterplots' in its metadata with the
output of this function. This list is subdivided into hypermethGplus or
hypomethGplus results as selected by the user, which are further subdivided
into lists with data for the top overall genes, and for top TF genes only.
Each of these lists contains a final list for each of the top genes/TFs
containing scatterplots for each RE DNA methylation site linked to the gene.
If the user has specified RE DNA methylation sites of interest, an additional
list named 'selectedDNAMethylationSites' is generated for each quadrant
containing scatterplots for each gene linked to each specified RE DNA
methylation site. In each scatterplot, the expression of the gene is plotted
on the X-axis, and the methylation of the linked RE DNA methylation site is
plotted on the Y-axis. If complex plots are being created, the CNV and SM
status of each sample, if present, will be represented by each point's shape
(with SM status taking precedence over CNV), and the purity of each sample
will be reflected in each point's size.
}
\description{
This function takes the top genes and transcription factors by number of
linked RE DNA methylation sites identified by the
\code{step6DNAMethylationSitesPerGeneTabulation} function up to a number
specified by the user, or all genes linked to selected RE DNA methylation
sites specified by the user, and generates scatterplots displaying
the expression level of each of these genes in the X-axis and the
methylation level of each RE DNA methylation site linked to them in the
Y-axis for the hyper- and/or hypomethylated G+ analysis quadrants.
The scatterplots may optionally incorporate provided copy number variation
(CNV), somatic mutation (SM), and purity information for each sample.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
## This example uses the example MultiAssayExperiment provided in the
## TENET.ExperimentHub package to create scatterplots for the top 10
## genes and TFs by number of linked hyper- and hypomethylated RE DNA
## methylation sites, showing expression of these genes and the DNA
## methylation level of their linked RE DNA methylation sites. Gene names
## will be retrieved from the rowRanges of the 'expression'
## SummarizedExperiment object in the example MultiAssayExperiment. No CNV,
## SM, or purity data will be incorporated, and the analysis will be
## performed using one CPU core.

## Load the example TENET MultiAssayExperiment object
## from the TENET.ExperimentHub package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()

## Use the example dataset to create the scatterplots
returnValue <- step7ExpressionVsDNAMethylationScatterplots(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment
)

## This example demonstrates many of the analysis options, creating
## scatterplots for the top 5 genes and TFs as well as some example RE DNA
## methylation sites of interest. As before, gene names will be retrieved
## from the rowRanges of the 'expression' SummarizedExperiment object.
## Complex scatterplots are created which display each sample's CNV and SM
## status for each gene, as well as purity data, where available. The CNV,
## SM, and purity data will be taken from specific columns of the
## exampleTENETClinicalDataFrame object. The analysis will be performed using
## 8 CPU cores.

## Load the example TENET MultiAssayExperiment object
## from the TENET.ExperimentHub package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()

## Load the data frame with example clinical data for patients in the TENET
## MultiAssayExperiment object from the TENET.ExperimentHub package
exampleTENETClinicalDataFrame <-
    TENET.ExperimentHub::exampleTENETClinicalDataFrame()

## Use the example datasets to create the scatterplots
returnValue <- step7ExpressionVsDNAMethylationScatterplots(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment,
    hypermethGplusAnalysis = FALSE,
    topGeneNumber = 5,
    DNAMethylationSites = c("cg03095778", "cg24011501", "cg12989041"),
    simpleOrComplex = "complex",
    CNVData = exampleTENETClinicalDataFrame[seq(4, 42, by = 2)],
    SMData = exampleTENETClinicalDataFrame[seq(5, 43, by = 2)],
    purityData = exampleTENETClinicalDataFrame[3],
    coreCount = 8
)
\dontshow{\}) # examplesIf}
}
