% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/runDuplexDiscoverer.R
\name{runDuplexDiscoverer}
\alias{runDuplexDiscoverer}
\title{Executes all steps of DuplexDiscovereR pipeline}
\usage{
runDuplexDiscoverer(
  data,
  table_type = "",
  junctions_gr = NULL,
  anno_gr = NULL,
  anno_gr_keys = c("gene_id", "gene_name", "gene_type"),
  fafile = NULL,
  df_counts = NULL,
  sample_name = "sample",
  lib_type = "SE",
  min_junction_len = 5,
  max_gap = 50,
  min_arm_ratio = 0.1,
  min_overlap = 10,
  max_sj_shift = 10,
  gap_collapse_similar = 2,
  collapse_n_inter = 5,
  trim_alignments = FALSE,
  trim_length = 40,
  min_arm_len = 9,
  compute_p_values = TRUE
)
}
\arguments{
\item{data}{dataframe-like object with the split reads. Output of Chimeric.out.junction or dataframe with fileds defined by bedpe format:
c("chromA","startA",'endA',"chromB",'startB','endB','readname','flag','strandA','strandB', ... )
Alternatively, \code{GInteractions} object}

\item{table_type}{one in c("STAR","bedpe") Defines the type of the input dataframe.
ignored if input data is \code{GInteractions}}

\item{junctions_gr}{\pkg{GRanges} object with the splice junction coordinates}

\item{anno_gr}{\pkg{GRanges} object to use for the annotation of the interactions. Optional}

\item{anno_gr_keys}{c() vector with names of metadata fields in anno_gr which will be used for the annotation. Argument passed to
\code{annotateGI()} function. The c('gene_id','gene_name','gene_type') columns in anno_gr are used by default.}

\item{fafile}{path to the genome .fasta file. Used to calculate hybridization energy with \emph{RNADuplex}. Sequence names should correspond to the sequences from which the mapping index was created. Optional}

\item{df_counts}{A two- column dataframe with counts. Counts are used for p-value calculation. The first column should match the 'gene_id' feature in anno_gr. The second column is the respective count. Optional}

\item{sample_name}{A name of the sample, used for assembling the analysis statistics dataframe}

\item{lib_type}{one in c('SE','PE'). Type of the seqeuncing library. Default is 'SE'}

\item{min_junction_len}{a minimum allowed distance between chimeric arms for the read input.
Reads with the junction closer than \code{min_junction_len} are annotated as '2arm_shot' and not clustered to duplex groups}

\item{max_gap}{Parameter for read clustering. Minimum required shift between start and end coordinates of arms for pair of overlapping chimeric reads.
If the shift is longer than \code{max_gap} for either arm, then total read overlap between those reads is zero.}

\item{min_arm_ratio}{Parameter for read clustering.
If the overlap-to-span ratio for either arm (A or B) for pair of chimeric reads is less than \code{min_arm_ratio}, then the total overlap for this pair is set to zero.}

\item{min_overlap}{Parameter for read clustering. Minimum required overlap to for either arm (A or B) for pair of chimeric reads.}

\item{max_sj_shift}{Maximum shift between either donor and acceptor splice sites and chimeric junction coordinates to count chimeric junction as splice junction}

\item{gap_collapse_similar}{Parameter for read clustering (iterative step). Analogous to the max_gap, but applied \code{collapse_n_inter} times during the iterative merging step.
Reduce this to 1 or 2 to lower RAM usage for clustering the library with many similar reads.}

\item{collapse_n_inter}{Parameter for read clustering  (iterative step). Number of iterations to repeat step of collapsing of the highly similar chimeric reads.
Increasing this from i.e 0 to 5 reduces clustering time and memory for the libraries with many overlapping reads.}

\item{trim_alignments}{TRUE or FALSE. Whether to trim arms alignments to
'trim_length' nucleotide around chimeric junction}

\item{trim_length}{target size of trimmed alignment}

\item{min_arm_len}{minimum allowed length of the alignment arm.
Read will be dropped if either arm is shorter}

\item{compute_p_values}{TRUE or FALSE. whether to calcualte random ligation test}
}
\value{
a list with the  following keys
\describe{
\item{\code{duplex_groups}}{ \code{GInteractions} object with chimeric reads clustered duplex groups }
\item{\code{chimeric_reads}}{ \code{GInteractions} object with non-collapsed chimeric reads }
\item{\code{reads_classes}}{ \code{tbl_df} dataframe parallel to the the input dataframe, annotated with read categories and duplex groups }
\item{\code{chimeric_reads_stats}}{ \code{tbl_df} dataframe containing read type classification statistics }
\item{\code{run_stats}}{ \code{tbl_df} dataframe with the time and memory info about the run }
}
}
\description{
Generates GInteractions object with duplex groups from the STAR
Chimeric.out.junction or bedpe file.
Classifies reads, annotates reads by overlap with the gene or transcript
features, calculates p-values and hybridization energies.
Additionally, returns mappings from duplex groupd back to genes.
}
\details{
This is a main function to do the initial discovery of the RNA duplexes after
the chimeric read mapping. It wraps following procedures:
\itemize{
\item Classifies the input reads by the mapping type.
Keeps 2-arm chimeric reads for downstream analysis
\item Compares 2arm duplex reads against provided splice junctions
\item Classifies 2arm duplexes into spurious self-overlapping, splice junction categoris
\item Performs clustering of the remaining reads into duplex groups
\itemize{
\item Collapses identically mapped reads
\item Collapses closely located reads, almost identical reads
\item Finds duplex groups throughout whole  data set
}
\item Annotates duplex groups with genomic features if annotation is provided
\item Calculates p-values if gene counts and annotation are provided
\item Calculates hybridization energies if path to the .fasta file is provided
}
}
\examples{

library(DuplexDiscovereR)
# load data
data("RNADuplexesSampleData")
result <- runDuplexDiscoverer(
    data = RNADuplexesRawChimSTAR,
    junctions_gr = SampleSpliceJncGR,
    anno_gr = SampleGeneAnnoGR,
    df_counts = RNADuplexesGeneCounts,
    sample_name = "test clustering",
    fafile = NULL,
    collapse_n_inter = 3,
    lib_type = "SE",
    table_type = "STAR"
)
# see results object
print(result)
# duplex groups
dd_get_duplex_groups(result)
# individual chimeric reads
dd_get_chimeric_reads(result)
# counts of detected read tyoes
dd_get_chimeric_reads_stats(result)
}
\seealso{
\code{\link[=DuplexDiscovererResults]{DuplexDiscovererResults()}}
}
