% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/controlled_geneset_enrichment.r
\name{controlled_geneset_enrichment}
\alias{controlled_geneset_enrichment}
\title{Celltype controlled geneset enrichment}
\usage{
controlled_geneset_enrichment(
  disease_genes,
  functional_genes,
  bg = NULL,
  sct_data,
  sctSpecies = NULL,
  output_species = "human",
  disease_genes_species = NULL,
  functional_genes_species = NULL,
  method = "homologene",
  annotLevel,
  reps = 100,
  controlledCT,
  use_intersect = FALSE,
  verbose = TRUE
)
}
\arguments{
\item{disease_genes}{Array of gene symbols containing the disease gene list.
Does not have to be disease genes. Must be from same species as the single
cell transcriptome dataset.}

\item{functional_genes}{Array of gene symbols containing the functional gene
list. The enrichment of this gene set within the disease_genes is tested.
Must be from same species as the single cell transcriptome dataset.}

\item{bg}{List of gene symbols containing the background gene list
(including hit genes). If \code{bg=NULL},
 an appropriate gene background will be created automatically.}

\item{sct_data}{List generated using \link[EWCE]{generate_celltype_data}.}

\item{sctSpecies}{Species that \code{sct_data} is currently formatted as
(no longer limited to just "mouse" and "human").
See \link[EWCE]{list_species} for all available species.}

\item{output_species}{Species to convert \code{sct_data} and \code{hits} to
(Default: "human").
See \link[EWCE]{list_species} for all available species.}

\item{disease_genes_species}{Species of the
\code{disease_genes} gene set.}

\item{functional_genes_species}{Species of the
\code{functional_genes} gene set.}

\item{method}{R package to use for gene mapping:
\describe{
 \item{\code{"gprofiler"}}{Slower but more species and genes.}
 \item{\code{"homologene"}}{Faster but fewer species and genes.}
 \item{\code{"babelgene"}}{Faster but fewer species and genes.
 Also gives consensus scores for each gene mapping based on a
 several different data sources.}
}}

\item{annotLevel}{An integer indicating which level of \code{sct_data} to
analyse (\emph{Default: 1}).}

\item{reps}{Number of random gene lists to generate (\emph{Default: 100},
but should be >=10,000 for publication-quality results).}

\item{controlledCT}{[Optional] If not NULL, and instead is the name of a
cell type, then the bootstrapping controls for expression within that
cell type.}

\item{use_intersect}{When \code{species1} and \code{species2} are both
different from \code{output_species}, this argument will determine whether 
to use the intersect (\code{TRUE}) or union (\code{FALSE}) of all genes
from \code{species1} and \code{species2}.}

\item{verbose}{Print messages.}
}
\value{
A list containing three data frames:
\itemize{
  \item \code{p_controlled} The probability that functional_genes are
  enriched in disease_genes while controlling for the level of specificity
  in controlledCT
  \item \code{z_controlled} The z-score that functional_genes are enriched
  in disease_genes while controlling for the level of specificity in
  controlledCT
  \item \code{p_uncontrolled} The probability that functional_genes are
  enriched in disease_genes WITHOUT controlling for the level of
  specificity in controlledCT
  \item \code{z_uncontrolled} The z-score that functional_genes are enriched
  in disease_genes WITHOUT controlling for the level of specificity in
  controlledCT
  \item \code{reps=reps}
  \item \code{controlledCT}
  \item \code{actualOverlap=actual} The number of genes that overlap between
  functional and disease gene sets
}
}
\description{
\code{controlled_geneset_enrichment} tests whether a functional gene set is
still enriched in a disease gene set after controlling for the
disease gene set's enrichment in a particular cell type (the 'controlledCT')
}
\examples{
# See the vignette for more detailed explanations
# Gene set enrichment analysis controlling for cell type expression
# set seed for bootstrap reproducibility
set.seed(12345678)
## load merged dataset from vignette
ctd <- ewceData::ctd()
schiz_genes <- ewceData::schiz_genes()
hpsd_genes <- ewceData::hpsd_genes()
# Use 3 bootstrap lists for speed, for publishable analysis use >10000
reps <- 3

res_hpsd_schiz <- EWCE::controlled_geneset_enrichment(
    disease_genes = schiz_genes,
    functional_genes = hpsd_genes,
    sct_data = ctd,
    annotLevel = 1,
    reps = reps,
    controlledCT = "pyramidal CA1"
)
}
