% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/find_ep_coenrichment.R
\name{find_ep_coenrichment}
\alias{find_ep_coenrichment}
\title{Find co-enriched motif pairs in enhancer-promoter interactions}
\usage{
find_ep_coenrichment(
  int_raw_data,
  motifs_file,
  motifs_file_matrix_format = c("pfm", "ppm", "pwm"),
  genome_id = c("hg38", "hg19", "mm9", "mm10"),
  identify_ep = TRUE,
  cooccurrence_method = c("count", "score", "match"),
  filter_threshold = 0.4
)
}
\arguments{
\item{int_raw_data}{a \code{\link[GenomicInteractions]{GenomicInteractions}}
object or a data frame with at least six columns:
\tabular{rl}{
  column 1: \tab character; genomic location of interaction anchor 1 -
  chromosome (e.g., \code{"chr3"})\cr
  column 2: \tab integer; genomic location of interaction anchor 1 -
  start coordinate\cr
  column 3: \tab integer; genomic location of interaction anchor 1 -
  end coordinate\cr
  column 4: \tab character; genomic location of interaction anchor 2 -
  chromosome (e.g., \code{"chr3"})\cr
  column 5: \tab integer; genomic location of interaction anchor 2 -
  start coordinate\cr
  column 6: \tab integer; genomic location of interaction anchor 2 -
  end coordinate
}}

\item{motifs_file}{\href{http://jaspar.genereg.net/faq/}{JASPAR format}
matrix file containing multiple motifs to scan for, gz-zipped files allowed}

\item{motifs_file_matrix_format}{type of position-specific scoring matrices
in \code{motifs_file}, valid options include:
\tabular{rl}{
  \code{pfm}: \tab position frequency matrix, elements are absolute
  frequencies, i.e., counts (default)\cr
  \code{ppm}: \tab position probability matrix, elements are probabilities,
  i.e., Laplace smoothing corrected relative frequencies\cr
  \code{pwm}: \tab position weight matrix, elements are log likelihoods
}}

\item{genome_id}{ID of genome assembly interactions in \code{int_raw_data}
were aligned to, valid options include \code{hg19}, \code{hg38}, \code{mm9},
and \code{mm10}, defaults to \code{hg38}}

\item{identify_ep}{logical, set \code{FALSE} if enhancers and promoters
should not be identified based on genomic annotations, but instead
assumes anchor 1 contains promoters and anchor 2 contains enhancers,
for all interactions in \code{int_raw_data}, defaults to \code{TRUE}, i.e.,
do identify enhancers and promoters of interactions in \code{int_raw_data}
based on genomic interactions and filter all interactions which are not
between promoters and enhancers}

\item{cooccurrence_method}{method for co-occurrence, valid options include:
\tabular{rl}{
  \code{count}: \tab correlation between counts (for each anchor, tally
  positions where motif score > \eqn{5 * 10^{-5}})\cr
  \code{score}: \tab correlation between motif scores (for each anchor, use
  the maximum score over all positions)\cr
  \code{match}: \tab association between motif matches (for each anchor,
  a match is defined if the is at least one position with a motif score
  > \eqn{5 * 10^{-5}})
}
See \code{\link{anchor_pair_enrich}} for details.}

\item{filter_threshold}{fraction of interactions that should contain a
motif for a motif to be considered, see \code{\link{filter_motifs}},
defaults to \code{0.4}}
}
\value{
a list with the following items:
\tabular{rl}{
  \code{int_data} \tab
  \code{\link[GenomicInteractions]{GenomicInteractions}} object;
  promoter-enhancer interactions\cr
 \code{int_data_motifs}: \tab \code{interactionData} object; return value of
 \code{\link{scan_motifs}}\cr
  \code{filtered_int_data_motifs}: \tab \code{interactionData} object;
  return value of \code{\link{filter_motifs}}\cr
  \code{annotation_pie_chart}: \tab ggplot2 plot; return value of
  \code{\link[GenomicInteractions]{plotInteractionAnnotations}}\cr
  \code{motif_cooccurrence}: \tab \code{interactionData} object; return
  value of \code{\link{anchor_pair_enrich}}
}
}
\description{
Identifies co-enriched pairs of motifs in enhancer-promoter interactions
selected from a data frame of general genomic interactions.

If \code{identify_ep}: Promoters and enhancers are identified
using genomic annotations, where anchors close to promoter annotations
(within 2500 base pairs) are considered promoters and all other anchors are
considered gene-distal enhancers. Only interactions in
\code{int_raw_data} between promoters and enhancers are used for motif
co-enrichment analysis.

If \code{!identify_ep}: Instead of automatically identifying
promoters and enhancers based on genomic annotations, all interactions
in \code{int_raw_data} must be preprocessed in a way that anchor 1 contains
promoters and anchor 2 contains enhancers. Motif
co-enrichment analysis is performed under this assumption.

Calls functions \code{\link{scan_motifs}}, \code{\link{filter_motifs}},
and \code{\link{anchor_pair_enrich}} internally.
}
\examples{
\dontrun{
interactions_file <- system.file("extdata/yy1_interactions.bedpe.gz",
                                 package = "spatzie")
motifs_file <- system.file("extdata/motifs_subset.txt.gz",
                           package = "spatzie")

df <- read.table(gzfile(interactions_file), header = TRUE, sep = "\t")
res <- find_ep_coenrichment(df, motifs_file,
                            motifs_file_matrix_format = "pfm",
                            genome_id = "mm10")
}

}
\author{
Jennifer Hammelman

Konstantin Krismer
}
