% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tabix.R, R/tabix_gr.R, R/tabix_raw.R
\name{tabix}
\alias{tabix}
\alias{tabix_gr}
\alias{tabix_raw}
\title{Query records from tabixed BED files}
\usage{
tabix(bedfiles, regions, aligner = NULL, col.names = NULL, nthreads = NULL)

tabix_gr(
  bedfiles,
  regions,
  aligner = NULL,
  col.names = NULL,
  zero_based = TRUE,
  nthreads = NULL
)

tabix_raw(bedfiles, regions, nthreads = NULL)
}
\arguments{
\item{bedfiles}{The BED files to be queried}

\item{regions}{A vector, data frame or GenomicRanges of genomic regions. See
details.}

\item{aligner}{The aligner used to produce the BED files - one of "biscuit",
"bismark", "bsbolt". Will set the result data.table's column names based on
this argument.}

\item{col.names}{A vector of column names for the data columns of the
result.table, not including "chr", "start", and "end". Set if your BED file
is not from the supported aligners or is a general BED file.}

\item{nthreads}{Set number of threads to use overriding the
\code{"iscream.threads"} option. See \code{?set_threads} for more information.}

\item{zero_based}{Whether the input BED file has a zero-based start column -
used when coverting the result data frame to GenomicRanges.}
}
\value{
\itemize{
\item \code{tabix()}: A data frame
\item \code{tabix_gr()}: A \code{GRanges} object for single files and \code{GRangesList} for
multiple files. When making \code{GRanges}, the 0-based records from BED-files
will be converted to 1-based with
\code{GenomicRanges::makeGRangesFromDataFrame()}. Bismark's coverage files will
not be converted as they are already 1-based and the \code{ranges} slot will be
only one position.
\item \code{tabix_raw()}: A named list of raw strings from the regions in the style
of \code{Rsamtools::scanTabix}
}
}
\description{
Query records from tabixed BED files
}
\details{
\subsection{Query method}{

'\emph{iscream} has two methods to query records from BED files:
\itemize{
\item the \emph{tabix} shell executable: fast since its output can be redirected to a
file (which \code{data.table::fread()} can then read) instead of having to
allocate memory and store it during the query
\item \emph{iscream's} tabix implementation, based on the \emph{tabix} executable using
\emph{htslib}, but slower on large queries since it stores the records as they
are found instead of writing to a file. However it's able to store each
region's records independently instead of in a single file and is used in
\code{make_mat()}, \code{make_mat_bsseq()}, and \code{summarize_regions()}.
}

When \emph{iscream} is attached, it checks that the \emph{tabix} executable is
available with \code{Sys.which()} and, if available, sets \code{options("tabix.method" = "shell")}. \code{tabix()} then uses the \emph{tabix} executable to make queries,
except for \code{tabix_raw()}. If \emph{tabix} is not found, \emph{iscream} uses its tabix
implementation. To use only \emph{iscream's} tabix implementation, set
\code{options("tabix.method" = "htslib")}.
}

\subsection{Input region formats}{

The input regions format may be string vector in the form "chr:start-end",
a dataframe with "chr", "start" and "end" columns or a \code{GRanges} object.
Input regions must be 1-based. When using \code{"htslib"} as the query method, if
the input \code{GRanges} object of regions contains any single locus regions
where the start and end positions are the same, iscream will notify that
such regions were found and fixed as \code{chr:start} format strings are invalid
for the htslib API (see \code{?get_granges_string}).
}
}
\examples{
bedfiles <- system.file("extdata", package = "iscream") |>
  list.files(pattern = "[a|b|c|d].bed.gz$", full.names = TRUE)
regions <- c("chr1:1-6", "chr1:7-10", "chr1:11-14")
tabix(bedfiles, regions, col.names = c("beta", "coverage"))
if (require("GenomicRanges", quietly = TRUE)) {
  tabix_gr(bedfiles, regions, col.names = c("beta", "coverage"))
}
tabix_raw(bedfiles, regions)
}
