% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step7TopGenesSurvival.R
\name{step7TopGenesSurvival}
\alias{step7TopGenesSurvival}
\title{Perform Kaplan-Meier and Cox regression analyses to assess the association of
patient survival with the expression of top genes and transcription factors
and methylation of their linked RE DNA methylation sites}
\usage{
step7TopGenesSurvival(
  TENETMultiAssayExperiment,
  geneAnnotationDataset = NA,
  hypermethGplusAnalysis = TRUE,
  hypomethGplusAnalysis = TRUE,
  topGeneNumber = 10,
  vitalStatusData = NA,
  survivalTimeData = NA,
  highProportion = 0.5,
  lowProportion = 0.5,
  survivalGroupingCutoffs = NA,
  jenksBreaksGroupCount = NA,
  generatePlots = TRUE,
  coreCount = 1
)
}
\arguments{
\item{TENETMultiAssayExperiment}{Specify a MultiAssayExperiment object
containing expression and methylation SummarizedExperiment objects, such as
one created by the TCGADownloader function. The object's metadata must
contain the results from the \code{step2GetDifferentiallyMethylatedSites},
\code{step5OptimizeLinks}, and \code{step6DNAMethylationSitesPerGeneTabulation}
functions. The object's colData must contain 'vital_status' and 'time'
columns containing data on the patients' survival status and time to
event/censorship, respectively.}

\item{geneAnnotationDataset}{Specify a gene annotation dataset which is
used to identify names for genes by their Ensembl IDs. The argument must be
either a GRanges object (such as one imported via \code{rtracklayer::import}) or a
path to a GFF3 or GTF file. Both GENCODE and Ensembl annotations are
supported. Other annotation datasets may work, but have not been tested.
See the "Input data" section of the vignette for information on the required
dataset format.
Specify NA to use the gene names listed in the "geneName" column of the
elementMetadata of the rowRanges of the "expression" SummarizedExperiment
object within the TENETMultiAssayExperiment object. Defaults to NA.}

\item{hypermethGplusAnalysis}{Set to TRUE to perform survival analyses on the
top genes and TFs by most hypermethylated RE DNA methylation sites with G+
links, as well as their linked RE DNA methylation sites.}

\item{hypomethGplusAnalysis}{Set to TRUE to perform survival analyses on the
top genes and TFs by most hypomethylated RE DNA methylation sites with G+
links, as well as their linked RE DNA methylation sites.}

\item{topGeneNumber}{Specify the number of top genes and TFs, based on the
most linked RE DNA methylation sites of a given analysis type, for which to
perform survival analyses. Defaults to 10.}

\item{vitalStatusData}{Specify the patient vital status data for samples in
the TENETMultiAssayExperiment. Vital status should be given in the form of
either "alive" or "dead" (case-insensitive), or 1 or 2, indicating that the
sample was collected from a patient who was alive/censored or dead/reached
the outcome of interest, respectively. These data can be given as a vector,
data frame, matrix, or path to a TSV file. Given sample names must match
the names of the samples in the colData of the TENETMultiAssayExperiment. If
a vector is given, the names of its elements must be the sample names; if it
has no names, its length must equal the number of samples in the colData, and
its values must be in the same order as the samples in the colData. If a data
frame or matrix is given, its rownames must contain the sample names, and its
first column must contain the vital status. If a TSV file is given, its first
column must contain the sample names, its second column must contain the
vital status, and its first row must contain column names. If set to NA,
vital status data will be retrieved from the "vital_status" column of the
colData of the TENETMultiAssayExperiment. Defaults to NA.}

\item{survivalTimeData}{Specify the numeric survival time data for samples in
the TENETMultiAssayExperiment. These data can be given as a vector, data
frame, matrix, or path to a TSV file; see the documentation for
\code{vitalStatusData} for more information. If set to NA, survival time data will
be retrieved from the "time" column of the colData of the
TENETMultiAssayExperiment. Defaults to NA.}

\item{highProportion}{Specify the proportion of all samples to include in the
high expression/methylation group for Kaplan-Meier survival analyses as a
number ranging from 0 to 1. \strong{Note:} If the \code{survivalGroupingCutoffs} or
\code{jenksBreaksGroupCount} argument is specified, this argument will be
ignored. Defaults to 0.5.}

\item{lowProportion}{Specify the proportion of all samples to include in the
low expression/methylation group for Kaplan-Meier survival analyses as a
number ranging from 0 to 1. \strong{Note:} If the \code{survivalGroupingCutoffs} or
\code{jenksBreaksGroupCount} argument is specified, this argument will be
ignored. If both \code{lowProportion} and \code{highProportion} are set to 0.5, samples
at exactly the 50th percentile will be assigned to the "Low" group. Defaults
to 0.5.}

\item{survivalGroupingCutoffs}{To use custom sample grouping, specify a data
frame or matrix with two columns and \emph{n} rows, where \emph{n} is the number of
groups the samples should be broken into, and values ranging from 0 to 1
reflecting the proportion of samples to include in each group. Values in the
first column should reflect the minimum proportion, and values in the second
column should reflect the maximum proportion (non-inclusive if not 1). If the
object has row names, they will be used to name the groups. If specified, the
\code{highProportion} and \code{lowProportion} arguments will be ignored. Defaults to
NA.}

\item{jenksBreaksGroupCount}{Specify the number of groups into which to break
the survival data as a positive integer. Cutoffs for each group will be
generated using Jenks natural breaks optimization. If specified, the
\code{highProportion} and \code{lowProportion} arguments will be ignored. Defaults to
NA.}

\item{generatePlots}{Set to TRUE to generate plots displaying the
Kaplan-Meier survival results for the top genes and TFs of interest and their
linked RE DNA methylation sites. Defaults to TRUE.}

\item{coreCount}{Argument passed as the mc.cores argument to mclapply. See
\code{?parallel::mclapply} for more details. Defaults to 1.}
}
\value{
Returns the MultiAssayExperiment object given as the
TENETMultiAssayExperiment argument with an additional list
named 'step7TopGenesSurvival' in its metadata containing the output of this
function. This list contains \code{hypermethGplus} and/or \code{hypomethGplus} lists,
as selected by the user, which contain lists for the top overall genes and
top TF genes. Each contains a list of data frames containing survival
statistics for the top genes/TFs and their linked RE DNA methylation sites
from both Kaplan-Meier and Cox regression analyses, and a list of
Kaplan-Meier plots if \code{generatePlots} is TRUE.
}
\description{
This function takes the top genes and transcription factors (TFs) by number
of linked RE DNA methylation sites identified by the
\code{step6DNAMethylationSitesPerGeneTabulation} function, up to the number
specified by the user, along with patient survival data, and generates plots
and tables with statistics assessing the association of patient survival with
the expression of top genes and transcription factors and methylation of
their linked RE DNA methylation sites, using groupings based on percentile
cutoffs or Jenks natural breaks for Kaplan-Meier analyses.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
## This example uses the example MultiAssayExperiment provided in the
## TENET.ExperimentHub package to perform Kaplan-Meier and Cox regression
## survival analyses on the top 10 genes and TFs by number of linked hyper-
## and hypomethylated RE DNA methylation sites, and on all unique RE DNA
## methylation sites linked to those genes. The vital status and
## survival time of patients will be taken from the "vital_status" and "time"
## columns of the colData of the example MultiAssayExperiment. Gene names
## will be retrieved from the rowRanges of the 'expression'
## SummarizedExperiment object in the example MultiAssayExperiment. In the
## Kaplan-Meier analyses, the patient samples with complete clinical
## information in the highest half of expression/methylation will be compared
## with those in the lowest half, and plots will be generated. The analysis
## will be performed using one CPU core.

## Load the example TENET MultiAssayExperiment object
## from the TENET.ExperimentHub package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()

## Use the example dataset to perform the survival analysis
returnValue <- step7TopGenesSurvival(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment
)

## This example uses the example MultiAssayExperiment provided in the
## TENET.ExperimentHub package to perform Kaplan-Meier and Cox regression
## survival analyses on only the top 5 genes and TFs by number of linked
## hypomethylated RE DNA methylation sites, and on all unique
## RE DNA methylation sites linked to those genes. The vital
## status and survival time of patients will be retrieved from a data frame
## with example patient data from the TENET.ExperimentHub package. Gene names
## will be retrieved from the rowRanges of the 'expression'
## SummarizedExperiment object in the example MultiAssayExperiment. In the
## Kaplan-Meier analyses, the patient samples with complete clinical
## information in the highest quartile of expression/methylation will be
## compared with those in the lowest quartile, and plots will not be
## generated. The analysis will be performed using 8 CPU cores.

## Load the example TENET MultiAssayExperiment object
## from the TENET.ExperimentHub package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()

## Load the example clinical data frame from the TENET.ExperimentHub
## package
exampleTENETClinicalDataFrame <-
    TENET.ExperimentHub::exampleTENETClinicalDataFrame()

## Use the example datasets to perform the survival analysis
returnValue <- step7TopGenesSurvival(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment,
    hypermethGplusAnalysis = FALSE,
    topGeneNumber = 5,
    vitalStatusData = exampleTENETClinicalDataFrame$vital_status,
    survivalTimeData = exampleTENETClinicalDataFrame$time,
    highProportion = 0.25,
    lowProportion = 0.25,
    generatePlots = FALSE,
    coreCount = 8
)

## This example uses the example MultiAssayExperiment provided in the
## TENET.ExperimentHub package to perform Kaplan-Meier and Cox regression
## survival analyses on the top 10 genes and TFs by number of linked hyper-
## and hypomethylated RE DNA methylation sites, and on all unique RE DNA
## methylation sites linked to those genes. The vital status and
## survival time of patients will be taken from the "vital_status" and "time"
## columns of the colData of the example MultiAssayExperiment. Gene names
## will be retrieved from the rowRanges of the 'expression'
## SummarizedExperiment object in the example MultiAssayExperiment. In the
## Kaplan-Meier analyses, custom group cutoffs representing quartiles will be
## used, and plots will be generated. The analysis will be performed using
## one CPU core.

## Load the example TENET MultiAssayExperiment object
## from the TENET.ExperimentHub package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()

## Create an example cutoff matrix which will split the samples into
## quartiles and define custom names for the resulting groups
cutoffMatrix <- data.frame(
    "Low" = c(0, (1 / 4), (1 / 2), (3 / 4)),
    "High" = c((1 / 4), (1 / 2), (3 / 4), 1)
)
rownames(cutoffMatrix) <- c(
    "GroupOne",
    "GroupTwo",
    "GroupThree",
    "GroupFour"
)

## Use the example dataset and cutoffMatrix to perform the survival analysis
returnValue <- step7TopGenesSurvival(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment,
    survivalGroupingCutoffs = cutoffMatrix
)

## This example uses the example MultiAssayExperiment provided in the
## TENET.ExperimentHub package to perform Kaplan-Meier and Cox regression
## survival analyses on the top 10 genes and TFs by number of linked hyper-
## and hypomethylated RE DNA methylation sites, and on all unique RE DNA
## methylation sites linked to those genes. The vital status and
## survival time of patients will be taken from the "vital_status" and "time"
## columns of the colData of the example MultiAssayExperiment. Gene names
## will be retrieved from the rowRanges of the 'expression'
## SummarizedExperiment object in the example MultiAssayExperiment. In the
## Kaplan-Meier analyses, the samples will be divided into 3 groups using
## Jenks natural breaks optimization, and plots will be generated. The
## analysis will be performed using one CPU core.

## Load the example TENET MultiAssayExperiment object
## from the TENET.ExperimentHub package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()

## Use the example dataset to perform the survival analysis
returnValue <- step7TopGenesSurvival(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment,
    jenksBreaksGroupCount = 3
)
\dontshow{\}) # examplesIf}
}
