% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/HiCool.R, R/utils.R
\name{HiCool}
\alias{HiCool}
\alias{importHiCoolFolder}
\alias{getHiCoolArgs}
\alias{getHicStats}
\title{Processing Hi-C paired-end fastq files in R}
\usage{
HiCool(
  r1 = "~/repos/tinyMapper/tests/testHiC_R1.fq.gz",
  r2 = "~/repos/tinyMapper/tests/testHiC_R2.fq.gz",
  genome = "R64-1-1",
  restriction = "DpnII,HinfI",
  binning = NULL,
  iterative = TRUE,
  balancing_args = " --min-nnz 10 --mad-max 5 ",
  threads = 1L,
  exclude_chr = "Mito|chrM|MT",
  output = "HiCool",
  keep_bam = FALSE,
  build_report = TRUE,
  scratch = tempdir()
)

importHiCoolFolder(output, hash, resolution = NULL)

getHiCoolArgs(log)

getHicStats(log)
}
\arguments{
\item{r1}{Path to fastq file (R1 read)}

\item{r2}{Path to fastq file (R2 read)}

\item{genome}{Genome used to map the reads on, provided either
as a fasta file (in which case the bowtie2 index will be automatically
generated), or as a prefix to a bowtie2 index (e.g. \code{mm10} for
\code{mm10.*.bt2} files). Genome can also be a unique ID for the following
references: \code{hg38}, \code{mm10}, \code{dm6}, \code{R64-1-1}, \code{GRZc10}, \code{WBcel235},
\code{Galgal4}.}

\item{restriction}{Restriction enzyme(s) used in HiC (Default: "DpnII,HinfI")}

\item{binning}{First resolution used to bin the final mcool file
(Default: 10000 for \code{hg38} and \code{mm10}, 1000 for \code{dm6}, \code{R64-1-1}, ...)}

\item{iterative}{Should the read mapping be performed iteratively?
(Default: TRUE)}

\item{balancing_args}{Balancing arguments for cooler.
See \code{cooler} documentation \href{https://cooler.readthedocs.io/en/latest/cli.html#cooler-balance}{here}
for a list of all available balancing arguments.
These defaults match those used by the 4DN consortium.}

\item{threads}{Number of CPUs used for parallelization. (Default: 1)}

\item{exclude_chr}{Chromosomes excluded from the final .mcool file. This will
not affect the pairs file. (Default: "Mito|chrM|MT")}

\item{output}{Output folder used by HiCool.}

\item{keep_bam}{Should the bam files be kept? (Default: FALSE)}

\item{build_report}{Should an automated report be computed? (Default: TRUE)}

\item{scratch}{Path to temporary directory where processing will take place.
(Default: \code{tempdir()})}

\item{hash}{Unique 6-letter ID used to identify files from a specific
HiCool processing run.}

\item{resolution}{Resolution used to import the mcool file}

\item{log}{Path to log file generated by hicstuff/hicool}
}
\value{
A \code{CoolFile} object with prefilled \code{pairsFile} and \code{metadata} slots.
}
\description{
\code{HiCool::HiCool()} automatically processes paired-end HiC sequencing files
by performing the following steps:
\enumerate{
\item Automatically setting up an appropriate conda environment using basilisk;
\item Mapping the reads to the provided genome reference using \code{hicstuff} and filtering of irrelevant pairs;
\item Filtering the resulting pairs file to remove unwanted chromosomes (e.g. chrM);
\item Binning the filtered pairs into a cool file at a chosen resolution;
\item Generating a multi-resolution mcool file;
\item Normalizing matrices at each resolution by iterative corretion using cooler.
}

The filtering strategy used by \code{hicstuff} is described in Cournac et al., BMC Genomics 2012.
}
\section{HiCool utils}{

\itemize{
\item \code{importHiCoolFolder(folder, hash)} automatically finds the different processed files
associated with a specific HiCool::HiCool() processing hash ID.
\item getHiCoolArgs() parses the log file generated by HiCool::HiCool() during
processing to recover which arguments were used.
\item getHicStats() parses the log file generated by HiCool::HiCool() during
processing to recover pre-computed stats about pair numbers, filtering
thresholds, etc.
}
}

\examples{
r1 <- HiContactsData::HiContactsData(sample = 'yeast_wt', format = 'fastq_R1')
r2 <- HiContactsData::HiContactsData(sample = 'yeast_wt', format = 'fastq_R2')
hcf <- HiCool(r1, r2, genome = 'R64-1-1', output = './HiCool/')
hcf
getHiCoolArgs(metadata(hcf)$log)
getHicStats(metadata(hcf)$log)
readLines(metadata(hcf)$log)
}
