% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MsBackend.R, R/MsBackendDataFrame-functions.R,
%   R/MsBackendDataFrame.R, R/MsBackendHdf5Peaks-functions.R,
%   R/MsBackendMemory-functions.R, R/MsBackendMemory.R,
%   R/MsBackendMzR-functions.R
\name{MsBackend}
\alias{MsBackend}
\alias{class:MsBackend}
\alias{MsBackend-class}
\alias{MsBackendDataFrame-class}
\alias{MsBackendMzR-class}
\alias{[,MsBackend-method}
\alias{uniqueMsLevels,MsBackend-method}
\alias{MsBackendMemory-class}
\alias{supportsSetBackend}
\alias{backendBpparam}
\alias{backendInitialize}
\alias{backendParallelFactor}
\alias{backendParallelFactor,MsBackendMzR-method}
\alias{backendParallelFactor,MsBackendHdf5Peaks-method}
\alias{dataStorageBasePath}
\alias{dataStorageBasePath,MsBackendMzR-method}
\alias{dataStorageBasePath<-}
\alias{dataStorageBasePath<-,MsBackendMzR-method}
\alias{extractByIndex}
\alias{msLeveL<-,MsBackend-method}
\alias{backendRequiredSpectraVariables}
\alias{backendRequiredSpectraVariables,MsBackend-method}
\alias{backendBpparam,MsBackend-method}
\alias{backendInitialize,MsBackend-method}
\alias{backendMerge,list-method}
\alias{backendMerge,MsBackend-method}
\alias{backendParallelFactor,MsBackend-method}
\alias{export,MsBackend-method}
\alias{acquisitionNum,MsBackend-method}
\alias{peaksData,MsBackend-method}
\alias{peaksVariables,MsBackend-method}
\alias{cbind2,MsBackend,dataframeOrDataFrameOrmatrix-method}
\alias{centroided,MsBackend-method}
\alias{centroided<-,MsBackend-method}
\alias{collisionEnergy,MsBackend-method}
\alias{collisionEnergy<-,MsBackend-method}
\alias{dataOrigin,MsBackend-method}
\alias{dataOrigin<-,MsBackend-method}
\alias{dataStorage,MsBackend-method}
\alias{dataStorage<-,MsBackend-method}
\alias{dropNaSpectraVariables,MsBackend-method}
\alias{extractByIndex,MsBackend,ANY-method}
\alias{extractByIndex,MsBackend,missing-method}
\alias{filterAcquisitionNum,MsBackend-method}
\alias{filterDataOrigin,MsBackend-method}
\alias{filterDataStorage,MsBackend-method}
\alias{filterEmptySpectra,MsBackend-method}
\alias{filterIsolationWindow,MsBackend-method}
\alias{filterMsLevel,MsBackend-method}
\alias{filterPolarity,MsBackend-method}
\alias{filterPrecursorMzRange,MsBackend-method}
\alias{filterPrecursorMz,MsBackend-method}
\alias{filterPrecursorMzValues,MsBackend-method}
\alias{filterPrecursorCharge,MsBackend-method}
\alias{filterPrecursorScan,MsBackend-method}
\alias{filterRanges,MsBackend-method}
\alias{filterRt,MsBackend-method}
\alias{filterValues,MsBackend-method}
\alias{intensity,MsBackend-method}
\alias{intensity<-,MsBackend-method}
\alias{ionCount,MsBackend-method}
\alias{isCentroided,MsBackend-method}
\alias{isEmpty,MsBackend-method}
\alias{isolationWindowLowerMz,MsBackend-method}
\alias{isolationWindowLowerMz<-,MsBackend-method}
\alias{isolationWindowTargetMz,MsBackend-method}
\alias{isolationWindowTargetMz<-,MsBackend-method}
\alias{isolationWindowUpperMz,MsBackend-method}
\alias{isolationWindowUpperMz<-,MsBackend-method}
\alias{isReadOnly,MsBackend-method}
\alias{length,MsBackend-method}
\alias{msLevel,MsBackend-method}
\alias{msLevel<-,MsBackend-method}
\alias{mz,MsBackend-method}
\alias{mz<-,MsBackend-method}
\alias{lengths,MsBackend-method}
\alias{polarity,MsBackend-method}
\alias{polarity<-,MsBackend-method}
\alias{precScanNum,MsBackend-method}
\alias{precursorCharge,MsBackend-method}
\alias{precursorIntensity,MsBackend-method}
\alias{precursorMz,MsBackend-method}
\alias{precursorMz<-,MsBackend-method}
\alias{peaksData<-,MsBackend-method}
\alias{reset,MsBackend-method}
\alias{rtime,MsBackend-method}
\alias{rtime<-,MsBackend-method}
\alias{scanIndex,MsBackend-method}
\alias{selectSpectraVariables,MsBackend-method}
\alias{smoothed,MsBackend-method}
\alias{smoothed<-,MsBackend-method}
\alias{spectraData,MsBackend-method}
\alias{spectraData<-,MsBackend-method}
\alias{spectraNames,MsBackend-method}
\alias{spectraNames<-,MsBackend-method}
\alias{spectraVariables,MsBackend-method}
\alias{split,MsBackend,ANY-method}
\alias{supportsSetBackend,MsBackend-method}
\alias{tic,MsBackend-method}
\alias{$,MsBackend-method}
\alias{$<-,MsBackend-method}
\alias{[[,MsBackend-method}
\alias{[[<-,MsBackend-method}
\alias{dataStorageBasePath,MsBackend-method}
\alias{dataStorageBasePath<-,MsBackend-method}
\alias{longForm,MsBackend-method}
\alias{MsBackendDataFrame}
\alias{backendInitialize,MsBackendDataFrame-method}
\alias{MsBackendHdf5Peaks}
\alias{MsBackendMemory}
\alias{backendInitialize,MsBackendMemory-method}
\alias{MsBackendMzR}
\title{Mass spectrometry data backends}
\usage{
\S4method{backendBpparam}{MsBackend}(object, BPPARAM = bpparam())

\S4method{backendInitialize}{MsBackend}(object, ...)

\S4method{backendMerge}{list}(object, ...)

\S4method{backendMerge}{MsBackend}(object, ...)

\S4method{backendParallelFactor}{MsBackend}(object, ...)

\S4method{export}{MsBackend}(object, ...)

\S4method{acquisitionNum}{MsBackend}(object)

\S4method{peaksData}{MsBackend}(object, columns = c("mz", "intensity"))

\S4method{peaksVariables}{MsBackend}(object)

\S4method{cbind2}{MsBackend,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...)

\S4method{centroided}{MsBackend}(object)

\S4method{centroided}{MsBackend}(object) <- value

\S4method{collisionEnergy}{MsBackend}(object)

\S4method{collisionEnergy}{MsBackend}(object) <- value

\S4method{dataOrigin}{MsBackend}(object)

\S4method{dataOrigin}{MsBackend}(object) <- value

\S4method{dataStorage}{MsBackend}(object)

\S4method{dataStorage}{MsBackend}(object) <- value

\S4method{dropNaSpectraVariables}{MsBackend}(object)

\S4method{extractByIndex}{MsBackend,ANY}(object, i)

\S4method{extractByIndex}{MsBackend,missing}(object, i)

\S4method{filterAcquisitionNum}{MsBackend}(object, n, file, ...)

\S4method{filterDataOrigin}{MsBackend}(object, dataOrigin = character())

\S4method{filterDataStorage}{MsBackend}(object, dataStorage = character())

\S4method{filterEmptySpectra}{MsBackend}(object, ...)

\S4method{filterIsolationWindow}{MsBackend}(object, mz = numeric(), ...)

\S4method{filterMsLevel}{MsBackend}(object, msLevel = integer())

\S4method{filterPolarity}{MsBackend}(object, polarity = integer())

\S4method{filterPrecursorMzRange}{MsBackend}(object, mz = numeric())

\S4method{filterPrecursorMz}{MsBackend}(object, mz = numeric())

\S4method{filterPrecursorMzValues}{MsBackend}(object, mz = numeric(), ppm = 20, tolerance = 0)

\S4method{filterPrecursorCharge}{MsBackend}(object, z = integer())

\S4method{filterPrecursorScan}{MsBackend}(object, acquisitionNum = integer(), f = dataOrigin(object))

\S4method{filterRanges}{MsBackend}(
  object,
  spectraVariables = character(),
  ranges = numeric(),
  match = c("all", "any")
)

\S4method{filterRt}{MsBackend}(object, rt = numeric(), msLevel. = integer())

\S4method{filterValues}{MsBackend}(
  object,
  spectraVariables = character(),
  values = numeric(),
  ppm = 0,
  tolerance = 0,
  match = c("all", "any")
)

\S4method{intensity}{MsBackend}(object)

\S4method{intensity}{MsBackend}(object) <- value

\S4method{ionCount}{MsBackend}(object)

\S4method{isCentroided}{MsBackend}(object, ...)

\S4method{isEmpty}{MsBackend}(x)

\S4method{isolationWindowLowerMz}{MsBackend}(object)

\S4method{isolationWindowLowerMz}{MsBackend}(object) <- value

\S4method{isolationWindowTargetMz}{MsBackend}(object)

\S4method{isolationWindowTargetMz}{MsBackend}(object) <- value

\S4method{isolationWindowUpperMz}{MsBackend}(object)

\S4method{isolationWindowUpperMz}{MsBackend}(object) <- value

\S4method{isReadOnly}{MsBackend}(object)

\S4method{length}{MsBackend}(x)

\S4method{msLevel}{MsBackend}(object)

\S4method{msLevel}{MsBackend}(object) <- value

\S4method{mz}{MsBackend}(object)

\S4method{mz}{MsBackend}(object) <- value

\S4method{lengths}{MsBackend}(x, use.names = FALSE)

\S4method{polarity}{MsBackend}(object)

\S4method{polarity}{MsBackend}(object) <- value

\S4method{precScanNum}{MsBackend}(object)

\S4method{precursorCharge}{MsBackend}(object)

\S4method{precursorIntensity}{MsBackend}(object)

\S4method{precursorMz}{MsBackend}(object)

\S4method{precursorMz}{MsBackend}(object, ...) <- value

\S4method{peaksData}{MsBackend}(object) <- value

\S4method{reset}{MsBackend}(object)

\S4method{rtime}{MsBackend}(object)

\S4method{rtime}{MsBackend}(object) <- value

\S4method{scanIndex}{MsBackend}(object)

\S4method{selectSpectraVariables}{MsBackend}(object, spectraVariables = spectraVariables(object))

\S4method{smoothed}{MsBackend}(object)

\S4method{smoothed}{MsBackend}(object) <- value

\S4method{spectraData}{MsBackend}(object, columns = spectraVariables(object))

\S4method{spectraData}{MsBackend}(object) <- value

\S4method{spectraNames}{MsBackend}(object)

\S4method{spectraNames}{MsBackend}(object) <- value

\S4method{spectraVariables}{MsBackend}(object)

\S4method{split}{MsBackend,ANY}(x, f, drop = FALSE, ...)

\S4method{supportsSetBackend}{MsBackend}(object, ...)

\S4method{tic}{MsBackend}(object, initial = TRUE)

\S4method{[}{MsBackend}(x, i, j, ..., drop = FALSE)

\S4method{$}{MsBackend}(x, name)

\S4method{$}{MsBackend}(x, name) <- value

\S4method{[[}{MsBackend}(x, i, j, ...)

\S4method{[[}{MsBackend}(x, i, j, ...) <- value

\S4method{uniqueMsLevels}{MsBackend}(object, ...)

\S4method{dataStorageBasePath}{MsBackend}(object)

\S4method{dataStorageBasePath}{MsBackend}(object) <- value

\S4method{longForm}{MsBackend}(object, columns = spectraVariables(object))

MsBackendDataFrame()

\S4method{backendInitialize}{MsBackendDataFrame}(object, data, peaksVariables = c("mz", "intensity"), ...)

MsBackendHdf5Peaks()

MsBackendMemory()

\S4method{backendInitialize}{MsBackendMemory}(object, data, peaksVariables = c("mz", "intensity"), ...)

MsBackendMzR()
}
\arguments{
\item{object}{Object extending \code{MsBackend}.}

\item{BPPARAM}{for \code{backendBpparam()}: parameter object from the
\code{BiocParallel} package defining the parallel processing setup.
Defaults to \code{BPPARAM = bpparam()}. See \code{\link[BiocParallel:register]{BiocParallel::bpparam()}}
for more information.}

\item{...}{Additional arguments.}

\item{columns}{For \code{spectraData()} accessor: optional \code{character} with
column names (spectra variables) that should be included in the
returned \code{DataFrame}. By default, all columns are returned.
For \code{peaksData()} accessor: optional \code{character} with requested columns
in the individual \code{matrix} of the returned \code{list}. Defaults to
\code{peaksVariables(object)} and depends on what \emph{peaks variables} the
backend provides.
For \code{longForm()}: the spectra and peaks variables that should be included
in the returned \code{data.frame}. Defaults to \code{spectraVariables(object)}
and is thus the union of spectra and peaks variables.}

\item{x}{Object extending \code{MsBackend}.}

\item{y}{For \code{cbind2()}: A \code{data.frame} or \code{DataFrame} with the
spectra variables to be added to the backend. The number of rows of \code{y}
and their order have to match the number of spectra and their order
in \code{x}.}

\item{value}{replacement value for \verb{<-} methods. See individual
method description or expected data type.}

\item{i}{For \code{[}: \code{integer}, \code{logical} or \code{character} to subset the object.}

\item{n}{for \code{filterAcquisitionNum()}: \code{integer} with the acquisition
numbers to filter for.}

\item{file}{For \code{filterFile()}: index or name of the file(s) to which the
data should be subsetted. For \code{export()}: \code{character} of length 1 or
equal to the number of spectra.}

\item{dataOrigin}{For \code{filterDataOrigin()}: \code{character} to define which
spectra to keep.
For \code{filterAcquisitionNum()}: optionally specify if filtering should
occur only for spectra of selected \code{dataOrigin}.}

\item{dataStorage}{For \code{filterDataStorage()}: \code{character} to define which
spectra to keep.
For \code{filterAcquisitionNum()}: optionally specify if filtering should
occur only for spectra of selected \code{dataStorage}.}

\item{mz}{For \code{filterIsolationWindow()}: \code{numeric(1)} with the m/z value to
filter the object. For \code{filterPrecursorMzRange()}: \code{numeric(2)} with the
lower and upper m/z boundary. For \code{filterPrecursorMzValues()}: \code{numeric}
with the m/z value(s) to filter the object.}

\item{msLevel}{\code{integer} defining the MS level of the spectra to which the
function should be applied. For \code{filterMsLevel()}: the MS level to which
\code{object} should be subsetted.}

\item{polarity}{For \code{filterPolarity()}: \code{integer} specifying the polarity to
to subset \code{object}.}

\item{ppm}{For \code{filterPrecursorMzValues()}: \code{numeric(1)} with the
m/z-relative maximal acceptable difference for a m/z to be considered
matching. See \code{\link[MsCoreUtils:matching]{MsCoreUtils::closest()}} for details.
For \code{filterValues()}: \code{numeric} of any length allowing to define
a maximal accepted difference between user input \code{values} and the
\code{spectraVariables} values.  If it is not equal to the length of the
value provided with parameter \code{spectraVariables}, \code{ppm[1]} will be
recycled.}

\item{tolerance}{For \code{filterPrecursorMzValues()}: \code{numeric(1)} with the
maximal absolute acceptable difference for a m/z value to be considered
matching. See \code{\link[MsCoreUtils:matching]{MsCoreUtils::closest()}} for details.
For \code{filterValues()}: \code{numeric}
accepted tolerance between the \code{values} and the spectra variables.
Defaults to \code{tolerance = 0}. If it is not equal to the length of the
value provided with parameter \code{spectraVariables}, \code{tolerance[1]} will
be recycled.}

\item{z}{For \code{filterPrecursorCharge()}: \code{integer()} with the precursor
charges to be used as filter.}

\item{acquisitionNum}{for \code{filterPrecursorScan()}: \code{integer} with the
acquisition number of the spectra to which the object should be
subsetted.}

\item{f}{\code{factor} defining the grouping to split \code{x}. See \code{\link[=split]{split()}}. For
\code{filterPrecursorScan()}: factor defining from which original data files
the spectra derive to avoid selecting spectra from different
samples/files. Defaults to \code{f = dataOrigin(object)}.}

\item{spectraVariables}{For \code{selectSpectraVariables()}: \code{character} with the
names of the spectra variables to which the backend should be subsetted.
For \code{filterRanges()} and \code{filterValues()}: \code{character} vector specifying
the column(s) from \code{spectraData(object)} on which to filter the data and
that correspond to the the names of the spectra variables that should be
used for the filtering.}

\item{ranges}{for \code{filterRanges()}: A \code{numeric} vector of paired values
(upper and lower boundary) that define the ranges to filter the \code{object}.
These paired values need to be in the same order as the
\code{spectraVariables} parameter (see below).}

\item{match}{For \code{filterRanges()} and \code{filterValues()}: \code{character(1) }
defining whether the condition has to match for all provided
\code{ranges}/\code{values} (\code{match = "all"}; the default), or for any of them
(\code{match = "any"}) for spectra to be retained.}

\item{rt}{for \code{filterRt()}: \code{numeric(2)} defining the retention time range
to be used to subset/filter \code{object}.}

\item{msLevel.}{same as \code{msLevel} above.}

\item{values}{For \code{filterValues()}: A \code{numeric} vector that define the
values to filter the \code{object}. \code{values} needs to be of same length than
parameter \code{spectraVariables} and in the same order.}

\item{use.names}{For \code{lengths()}: whether spectrum names should be used.}

\item{drop}{For \code{[}: not considered.}

\item{initial}{For \code{tic()}: \code{logical(1)} whether the initially
reported total ion current should be reported, or whether the
total ion current should be (re)calculated on the actual data
(\code{initial = FALSE}).}

\item{j}{For \code{[}: not supported.}

\item{name}{For \code{$} and \verb{$<-}: the name of the spectra variable to return
or set.}

\item{data}{For \code{backendInitialize()}: \code{DataFrame} with spectrum
metadata/data. This parameter can be empty for \code{MsBackendMzR} backends
but needs to be provided for \code{MsBackendDataFrame} backends.}

\item{peaksVariables}{For \code{backendInitialize()} for \code{MsBackendMemory}:
\code{character} specifying which of the columns of the provided \code{data}
contain \emph{peaks variables} (i.e. information for individual mass
peaks). Defaults to \code{peaksVariables = c("mz", "intensity")}. \code{"mz"}
and \code{"intensity"} should \strong{always} be specified.}
}
\value{
See documentation of respective function.
}
\description{
Note that the classes described here are not meant to be used
directly by the end-users and the material in this man page is
aimed at package developers.

\code{MsBackend} is a virtual class that defines what each different
backend needs to provide. \code{MsBackend} objects provide access to
mass spectrometry data. Such backends can be classified into
\emph{in-memory} or \emph{on-disk} backends, depending on where the data, i.e
spectra (m/z and intensities) and spectra annotation (MS level,
charge, polarity, ...) are stored.

Typically, in-memory backends keep all data in memory ensuring fast
data access, while on-disk backends store (parts of) their data on
disk and retrieve it on demand.

The \emph{Backend functions and implementation notes for new backend
classes} section documents the API that a backend must implement.

Currently available backends are:
\itemize{
\item \code{MsBackendMemory} and \code{MsBackendDataFrame}: store all data in memory. The
\code{MsBackendMemory} is optimized for accessing and processing the peak data
(i.e. the numerical matrices with the m/z and intensity values) while the
\code{MsBackendDataFrame} keeps all data in a \code{DataFrame}.
\item \code{MsBackendMzR}: stores the m/z and intensities on-disk in raw
data files (typically \code{mzML} or \code{mzXML}) and the spectra
annotation information (header) in memory in a \code{DataFrame}. This
backend requires the \code{mzR} package.
\item \code{MsBackendHdf5Peaks}: stores the m/z and intensities on-disk in custom hdf5
data files and the remaining spectra variables in memory (in a
\code{DataFrame}). This backend requires the \code{rhdf5} package.
}

See below for more details about individual backends.
}
\section{Implementation notes}{


Backends extending \code{MsBackend} \strong{must} implement all of its methods (listed
above). Developers of new \code{MsBackend}s should follow the
\code{MsBackendMemory} implementation. To ensure a new implementation being
conform with the \code{MsBackend} definition, developers should included test
suites provided by this package in their unit test setup. For that a variable
\code{be} should be created in the package's \code{"testthat.R"} file that represents
a (initialized) instance of the developed backend. Then the path to the
test suites should be defined with
\code{test_suite <- system.file("test_backends", "test_MsBackend", package = "Spectra")} followed by \code{test_dir(test_suite)} to run all test
files in that directory. Individual unit test files could be run with
\code{test_file(file.path(test_suite, "test_spectra_variables.R"), stop_on_failure = TRUE)} (note that without \code{stop_on_failure = TRUE} tests
would fail silently) . Adding this code to the packages \code{"testthat.R"} file
ensures that all tests checking the validity of an \code{MsBackend} instance
defined in the \code{Spectra} package are also run on the newly develped backend
class.

The \code{MsBackend} defines the following slots:
\itemize{
\item \verb{@readonly}: \code{logical(1)} whether the backend supports writing/replacing
of m/z or intensity values.
}



Backends extending \code{MsBackend} \strong{must} implement all of its methods (listed
above). Developers of new \code{MsBackend}s should follow the
\code{MsBackendDataFrame} implementation.

The \code{\link[=MsBackendCached]{MsBackendCached()}} backend provides a caching mechanism to allow
\emph{read only} backends to add or change spectra variables. This
backend shouldn't be used on its own, but is meant to be extended. See
\code{\link[=MsBackendCached]{MsBackendCached()}} for details.

The \code{MsBackend} defines the following slots:
\itemize{
\item \verb{@readonly}: \code{logical(1)} whether the backend supports writing/replacing
of m/z or intensity values.
}
}

\section{Backend functions}{


New backend classes \strong{must} extend the base \code{MsBackend} class will have to
implement some of the following methods (see the \code{MsBackend} vignette for
detailed description and examples):
\itemize{
\item \code{[}: subset the backend. Only subsetting by element (\emph{row}/\code{i}) is
allowed. Parameter \code{i} should support \code{integer} indices and \code{logical}
and should throw an error if \code{i} is out of bounds. The
\code{MsCoreUtils::i2index} could be used to check the input \code{i}.
For \code{i = integer()} an empty backend should be returned. Implementation
of this method is optional, as the default calls the \code{extractByIndex()}
method (which has to be implemented as the main subsetting method).
\item \code{$}, \verb{$<-}: access or set/add a single spectrum variable (column) in the
backend. Using a \code{value} of \code{NULL} should allow deleting the specified
spectra variable. An error should be thrown if the spectra variable is not
available.
\item \code{[[}, \verb{[[<-}: access or set/add a single spectrum variable (column) in the
backend. The default implementation uses \code{$}, thus these methods don't have
to be implemented for new classes extending \code{MsBackend}.
\item \code{acquisitionNum()}: returns the acquisition number of each
spectrum. Returns an \code{integer} of length equal to the number of
spectra (with \code{NA_integer_} if not available).
\item \code{backendBpparam()}: return the parallel processing setup supported by
the backend class. This function can be used by any higher
level function to evaluate whether the provided parallel processing
setup (or the default one returned by \code{bpparam()}) is supported
by the backend. Backends not supporting parallel processing (e.g.
because they contain a connection to a database that can not be
shared across processes) should extend this method to return only
\code{SerialParam()} and hence disable parallel processing for (most)
methods and functions. See also \code{backendParallelFactor()} for a
function to provide a preferred splitting of the backend for parallel
processing.
\item \code{backendInitialize()}: initialises the backend. This method is
supposed to be called rights after creating an instance of the
backend class and should prepare the backend (e.g. set the data
for the memory backend or read the spectra header data for the
\code{MsBackendMzR} backend). Parameters can be defined freely for each
backend, depending on what is needed to initialize the backend. It
is however suggested to also support a parameter \code{data} that can be
used to submit the full spectra data as a \code{DataFrame} to the
backend. This would allow the backend to be also usable for the
\code{\link[=setBackend]{setBackend()}} function from \code{Spectra}. Note that eventually (for
\emph{read-only} backends) also the \code{supportsSetBackend} method would need
to be implemented to return \code{TRUE}.
The \code{backendInitialize()} method has also to ensure to correctly set
spectra variable \code{dataStorage}.
\item \code{backendMerge()}: merges (combines) \code{MsBackend} objects into a single
instance. All objects to be merged have to be of the same type (e.g.
\code{\link[=MsBackendDataFrame]{MsBackendDataFrame()}}).
\item \code{backendParallelFactor()}: returns a \code{factor} defining an optimal
(preferred) way how the backend can be split for parallel processing
used for all peak data accessor or data manipulation functions.
The default implementation returns a factor of length 0 (\code{factor()})
providing thus no default splitting. \code{backendParallelFactor()} for
\code{MsBackendMzR} on the other hand returns \code{factor(dataStorage(object))}
hence suggesting to split the object by data file.
\item \code{backendRequiredSpectraVariables()}: returns a \code{character} with spectra
variable names that are mandatory for a specific backend. The default
returns an empty \code{character()}. The implementation for \code{MsBackendMzR}
returns \code{c("dataStorage", "scanIndex")} as these two spectra variables
are required to load the MS data on-the-fly. This method needs only to
be implemented if a backend requires specific variables to be defined.
\item \code{cbind2()}: allows to appends multiple new spectra variables to the
backend at once. The values for the new spectra variables have to
be in the same order as the spectra in \code{x}. Replacing existing spectra
variables is not supported through this function. For a more controlled
way of adding spectra variables, the \code{joinSpectraData()} should be used.
\item \code{centroided()}, \verb{centroided<-}: gets or sets the centroiding
information of the spectra. \code{centroided()} returns a \code{logical}
vector of length equal to the number of spectra with \code{TRUE} if a
spectrum is centroided, \code{FALSE} if it is in profile mode and \code{NA}
if it is undefined. See also \code{isCentroided()} for estimating from
the spectrum data whether the spectrum is centroided.  \code{value}
for \verb{centroided<-} is either a single \code{logical} or a \code{logical} of
length equal to the number of spectra in \code{object}.
\item \code{collisionEnergy()}, \verb{collisionEnergy<-}: gets or sets the
collision energy for all spectra in \code{object}. \code{collisionEnergy()}
returns a \code{numeric} with length equal to the number of spectra
(\code{NA_real_} if not present/defined), \verb{collisionEnergy<-} takes a
\code{numeric} of length equal to the number of spectra in \code{object}.
\item \code{dataOrigin()}: gets a \code{character} of length equal to the number of
spectra in \code{object} with the \emph{data origin} of each spectrum. This could
e.g. be the mzML file from which the data was read.
\item \code{dataStorage()}: gets a \code{character} of length equal to the number of
spectra in \code{object} with the data storage of each spectrum. Note that
missing values (\code{NA_character_}) are not supported for \code{dataStorage}.
\item \code{dataStorageBasePath()}, \verb{dataStorageBasePath<-: gets or sets the common *base* path of the directory containing all data files. If supported, the function is expected to return (or accept) a }character\verb{of length 1. Most backends (such as for example the}MsBackendMemory\verb{will not support this function and}dataStorageBasePath()\verb{will return}NA_character_\verb{. For }MsBackendMzR\verb{, this function allows to get or change the path to the directory containing the original data files, which is required if e.g. a serialized }MsBackendMzR` instance gets copied to another computer or
file system.
\item \code{dropNaSpectraVariables()}: removes spectra variables (i.e. columns in the
object's \code{spectraData} that contain only missing values (\code{NA}). Note that
while columns with only \code{NA}s are removed, a \code{spectraData()} call after
\code{dropNaSpectraVariables()} might still show columns containing \code{NA} values
for \emph{core} spectra variables.
\item \code{export()}: exports data from a \code{Spectra} class to a file. This method is
called by the \verb{export,Spectra} method that passes itself as a second
argument to the function. The \verb{export,MsBackend} implementation is thus
expected to take a \code{Spectra} class as second argument from which all data
is exported. Taking data from a \code{Spectra} class ensures that also all
eventual data manipulations (cached in the \code{Spectra}'s lazy evaluation
queue) are applied prior to export - this would not be possible with only a
\link{MsBackend} class. An example implementation is the \code{export()} method
for the \code{MsBackendMzR} backend that supports export of the data in
\emph{mzML} or \emph{mzXML} format. See the documentation for the \code{MsBackendMzR}
class below for more information.
\item \code{extractByIndex()}: function to subset a backend to selected elements
defined by the provided index. Similar to \code{[}, this method should allow
extracting (or to subset) the data in any order. In contrast to \code{[},
however, \code{i} is expected to be an \code{integer} (while \code{[} should also
support \code{logical} and eventually \code{character}). While being apparently
redundant to \code{[}, this methods avoids package namespace errors/problems
that can result in implementations of \code{[} being not found by R (which
can happen sometimes in parallel processing using the
\code{\link[BiocParallel:SnowParam-class]{BiocParallel::SnowParam()}}). This method is used internally by \code{Spectra}
to extract/subset its backend. Implementation of this method is mandatory.
\item \code{filterAcquisitionNum()}: filters the object keeping only spectra matching
the provided acquisition numbers (argument \code{n}). If \code{dataOrigin} or
\code{dataStorage} is also provided, \code{object} is subsetted to the spectra with
an acquisition number equal to \code{n} \strong{in spectra with matching dataOrigin
or dataStorage values} retaining all other spectra.
\item \code{filterDataOrigin()}: filters the object retaining spectra matching the
provided \code{dataOrigin}. Parameter \code{dataOrigin} has to be of type
\code{character} and needs to match exactly the data origin value of the
spectra to subset.
\code{filterDataOrigin()} should return the data ordered by the provided
\code{dataOrigin} parameter, i.e. if \code{dataOrigin = c("2", "1")} was provided,
the spectra in the resulting object should be ordered accordingly (first
spectra from data origin \code{"2"} and then from \code{"1"}).
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterDataStorage()}: filters the object retaining spectra matching the
provided \code{dataStorage}. Parameter \code{dataStorage} has to be of type
\code{character} and needs to match exactly the data storage value of the
spectra to subset.
\code{filterDataStorage()} should return the data ordered by the provided
\code{dataStorage} parameter, i.e. if \code{dataStorage = c("2", "1")} was provided,
the spectra in the resulting object should be ordered accordingly (first
spectra from data storage \code{"2"} and then from \code{"1"}).
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterEmptySpectra()}: removes empty spectra (i.e. spectra without peaks).
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterFile()}: retains data of files matching the file index or file name
provided with parameter \code{file}.
\item \code{filterIsolationWindow()}: retains spectra that contain \code{mz} in their
isolation window m/z range (i.e. with an \code{isolationWindowLowerMz} \code{<=} \code{mz}
and \code{isolationWindowUpperMz} \code{>=} \code{mz}.
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterMsLevel()}: retains spectra of MS level \code{msLevel}.
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterPolarity()}: retains spectra of polarity \code{polarity}.
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterPrecursorMzRange()} (previously \code{filterPrecursorMz}): retains
spectra with a precursor m/z within the provided m/z range.
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterPrecursorMzValues()}: retains spectra with a precursor m/z matching
any of the provided m/z values (given \code{ppm} and \code{tolerance}).
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterPrecursorCharge()}: retains spectra with the defined precursor
charge(s).
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterPrecursorScan()}: retains parent (e.g. MS1) and children scans (e.g.
MS2) of acquisition number \code{acquisitionNum}. Parameter \code{f} is supposed to
define the origin of the spectra (i.e. the original data file) to ensure
related spectra from the same file/sample are selected and retained.
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterRanges()}: allows filtering of the \code{Spectra} object based on user
defined \emph{numeric} ranges (parameter \code{ranges}) for one or more available
spectra variables in object (spectra variable names can be specified with
parameter \code{spectraVariables}). Spectra for which the value of a spectra
variable is within it's defined range are retained. If multiple
ranges/spectra variables are defined, the \code{match} parameter can be used
to specify whether all conditions (\code{match = "all"}; the default) or if
any of the conditions must match (\code{match = "any"}; all spectra for which
values are within any of the provided ranges are retained).
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterRt()}: retains spectra of MS level \code{msLevel} with retention times
within (\code{>=}) \code{rt[1]} and (\code{<=}) \code{rt[2]}. The filter is applied to all
spectra if no MS level is specified (the default, \code{msLevel. = integer()}).
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{filterValues()}: allows filtering of the \code{Spectra} object based on
similarities of \emph{numeric} values of one or more \code{spectraVariables(object)}
(parameter \code{spectraVariables}) to provided values (parameter \code{values})
given acceptable differences (parameters tolerance and ppm). If multiple
values/spectra variables are defined, the \code{match} parameter can be used
to specify whether all conditions (\code{match = "all"}; the default) or if
any of the conditions must match (\code{match = "any"}; all spectra for which
values are within any of the provided ranges are retained).
Implementation of this method is optional since a default implementation
for \code{MsBackend} is available.
\item \code{intensity()}: gets the intensity values from the spectra. Returns
a \code{\link[IRanges:AtomicList-class]{IRanges::NumericList()}} of \code{numeric} vectors (intensity values for each
spectrum). The length of the \code{list} is equal to the number of
\code{spectra} in \code{object}.
\item \verb{intensity<-}: replaces the intensity values. \code{value} has to be a \code{list}
(or \code{\link[IRanges:AtomicList-class]{IRanges::NumericList()}}) of length equal to the number of spectra
and the number of values within each list element identical to the
number of peaks in each spectrum (i.e. the \code{lengths(x)}). Note that just
writeable backends support this method.
\item \code{ionCount()}: returns a \code{numeric} with the sum of intensities for
each spectrum. If the spectrum is empty (see \code{isEmpty()}),
\code{NA_real_} is returned.
\item \code{isCentroided()}: a heuristic approach assessing if the spectra in
\code{object} are in profile or centroided mode. The function takes
the \code{qtl} th quantile top peaks, then calculates the difference
between adjacent m/z value and returns \code{TRUE} if the first
quartile is greater than \code{k}. (See \code{Spectra:::.peaks_is_centroided} for
the code.)
\item \code{isEmpty()}: checks whether a spectrum in \code{object} is empty
(i.e. does not contain any peaks). Returns a \code{logical} vector of
length equal number of spectra.
\item \code{isolationWindowLowerMz()}, \verb{isolationWindowLowerMz<-}: gets or sets the
lower m/z boundary of the isolation window.
\item \code{isolationWindowTargetMz()}, \verb{isolationWindowTargetMz<-}: gets or sets the
target m/z of the isolation window.
\item \code{isolationWindowUpperMz()}, \verb{isolationWindowUpperMz<-}: gets or sets the
upper m/z boundary of the isolation window.
\item \code{isReadOnly()}: returns a \code{logical(1)} whether the backend is \emph{read
only} or does allow also to write/update data.
\item \code{length()}: returns the number of spectra in the object.
\item \code{lengths()}: gets the number of peaks (m/z-intensity values) per
spectrum.  Returns an \code{integer} vector (length equal to the
number of spectra). For empty spectra, \code{0} is returned.
\item \code{longForm()}: extract the MS data in \emph{long form}, i.e., as a \code{data.frame}
with columns being requested spectra and peak variables and one row per
mass peak. Parameter \code{columns} can be used to specify the columns (i.e.,
spectra or peaks variables) that should be returned. The default is
\code{columns = spectraVariables(object)} and \strong{all} spectra and peak variables
are returned. It is strongly suggested to extract only selected columns
and not the full data to avoid potential out-of-memory problems.
Implementation of this method is optional as a default implementation for
\code{MsBackend} is available which converts the \code{DataFrame} returned by
\code{spectraData()} into long form.
\item \code{msLevel()}: gets the spectra's MS level. Returns an \code{integer}
vector (of length equal to the number of spectra) with the MS
level for each spectrum (or \code{NA_integer_} if not available).
\item \verb{msLevel<-}: replaces the spectra's MS level.
\item \code{mz()}: gets the mass-to-charge ratios (m/z) from the
spectra. Returns a \code{\link[IRanges:AtomicList-class]{IRanges::NumericList()}} or length equal to the
number of spectra, each element a \code{numeric} vector with the m/z values of
one spectrum.
\item \verb{mz<-}: replaces the m/z values. \code{value} has to be a \code{list} of length equal
to the number of spectra and the number of values within each list element
identical to the number of peaks in each spectrum (i.e. the
\code{lengths(x)}). Note that just writeable backends support this method.
\item \code{polarity()}, \verb{polarity<-}: gets or sets the polarity for each
spectrum.  \code{polarity()} returns an \code{integer} vector (length equal
to the number of spectra), with \code{0} and \code{1} representing negative
and positive polarities, respectively. \verb{polarity<-} expects an
integer vector of length 1 or equal to the number of spectra.
\item \code{precursorCharge()}, \code{precursorIntensity()}, \code{precursorMz()},
\code{precScanNum()}, \code{precAcquisitionNum()}: get the charge (\code{integer}),
intensity (\code{numeric}), m/z (\code{numeric}), scan index (\code{integer})
and acquisition number (\code{interger}) of the precursor for MS level
2 and above spectra from the object. Returns a vector of length equal to
the number of spectra in \code{object}. \code{NA} are reported for MS1
spectra of if no precursor information is available.
\item \code{peaksData()} returns a \code{list} with the spectras' peak data, i.e. m/z and
intensity values or other \emph{peak variables}. The length of the list is
equal to the number of spectra in \code{object}. Each element of the list has
to be a two-dimensional array (\code{matrix} or \code{data.frame})
with columns depending on the provided \code{columns} parameter (by default
\code{"mz"} and \code{"intensity"}, but depends on the backend's available
\code{peaksVariables}). For an empty spectrum, a \code{matrix} (\code{data.frame}) with
0 rows and columns according to \code{columns} is returned. The optional
parameter \code{columns}, if supported by the backend, allows to define which
peak variables should be returned in the \code{numeric} peak \code{matrix}. As a
default \code{c("mz", "intensity")} should be used.
\item \verb{peaksData<-} replaces the peak data (m/z and intensity values) of the
backend. This method expects a \code{list} of two dimensional arrays (\code{matrix}
or \code{data.frame}) with columns representing the peak variables. All
existing peaks data is expected to be replaced with these new values. The
length of the \code{list} has to match the number of spectra of \code{object}.
Note that only writeable backends need to support this method.
\item \code{peaksVariables()}: lists the available variables for mass peaks. Default
peak variables are \code{"mz"} and \code{"intensity"} (which all backends need to
support and provide), but some backends might provide additional variables.
All these variables are expected to be returned (if requested) by the
\code{peaksData()} function.
\item \code{reset()} a backend (if supported). This method will be called on the
backend by the \verb{reset,Spectra} method that is supposed to restore the data
to its original state (see \verb{reset,Spectra} for more details). The function
returns the \emph{reset} backend. The default implementation for \code{MsBackend}
returns the backend as-is.
\item \code{rtime()}, \verb{rtime<-}: gets or sets the retention times for each
spectrum (in seconds). \code{rtime()} returns a \code{numeric} vector (length equal
to the number of spectra) with the retention time for each spectrum.
\verb{rtime<-} expects a numeric vector with length equal to the
number of spectra.
\item \code{scanIndex()}: returns an \code{integer} vector with the \emph{scan index}
for each spectrum. This represents the relative index of the
spectrum within each file. Note that this can be different to the
\code{acquisitionNum()} of the spectrum which is the index of the
spectrum as reported in the mzML file.
\item \code{selectSpectraVariables()}: reduces the information within the backend to
the selected spectra variables. It is suggested to \strong{not} remove values
for the \code{"dataStorage"} variable, since this might be required for some
backends to work properly (such as the \code{MsBackendMzR}).
\item \code{smoothed()},\verb{smoothed<-}: gets or sets whether a spectrum is
\emph{smoothed}. \code{smoothed()} returns a \code{logical} vector of length equal
to the number of spectra. \verb{smoothed<-} takes a \code{logical} vector
of length 1 or equal to the number of spectra in \code{object}.
\item \code{spectraData()}, \verb{spectraData<-}: gets or sets general spectrum
metadata (annotation, also called header).  \code{spectraData()} returns
a \code{DataFrame}, \verb{spectraData<-} expects a \code{DataFrame} with the same number
of rows as there are spectra in \code{object}. Note that \code{spectraData()} has to
return the full data, i.e. also the m/z and intensity values (as a \code{list}
or \code{SimpleList} in columns \code{"mz"} and \code{"intensity"}. See also
\code{\link[=fillCoreSpectraVariables]{fillCoreSpectraVariables()}} for a function that can \emph{complete} a spectra
data data frame with eventually missing \emph{core} spectra variables.
\item \code{spectraNames()}: returns a \code{character} vector with the names of
the spectra in \code{object} or \code{NULL} if not set. \verb{spectraNames<-} allows to
set spectra names (if the object is not read-only).
\item \code{spectraVariables()}: returns a \code{character} vector with the
available spectra variables (columns, fields or attributes)
available in \code{object}. This should return \strong{all} spectra variables which
are present in \code{object}, also \code{"mz"} and \code{"intensity"} (which are by
default not returned by the \verb{spectraVariables,Spectra} method).
\item \code{split()}: splits the backend into a \code{list} of backends (depending on
parameter \code{f}). The default method for \code{MsBackend} uses \code{\link[=split.default]{split.default()}},
thus backends extending \code{MsBackend} don't necessarily need to implement
this method.
\item \code{supportsSetBackend()}: whether a \code{MsBackend} supports the \code{Spectra}
\code{setBackend()} function. For a \code{MsBackend} to support \code{setBackend()} it
needs to have a parameter called \code{data} in its \code{backendInitialize()} method
that support receiving all spectra data as a \code{DataFrame} from another
backend and to initialize the backend with this data. In general
\emph{read-only} backends do not support \code{setBackend()} hence, the default
implementation of \code{supportsSetBackend()} returns \code{!isReadOnly(object)}. If
a read-only backend would support the \code{setBackend()} and being initialized
with a \code{DataFrame} an implementation of this method for that backend could
be defined that returns \code{TRUE} (see also the \code{MsBackend} vignette for
details and examples).
\item \code{tic()}: gets the total ion current/count (sum of signal of a
spectrum) for all spectra in \code{object}. By default, the value
reported in the original raw data file is returned. For an empty
spectrum, \code{NA_real_} is returned.
\item \code{uniqueMsLevels()}: gets the unique MS levels of all spectra in \code{object}.
The default implementation calls \code{unique(msLevel(object))} but more
efficient implementations could be defined for specific backends.
}
}

\section{Subsetting and merging backend classes}{


Backend classes must support (implement) the \code{[} method to subset the object.
This method should only support subsetting by spectra (rows, \code{i}) and has
to return a \code{MsBackend} class.

Backends extending \code{MsBackend} should also implement the \code{backendMerge()}
method to support combining backend instances (only backend classes of the
same type should be merged). Merging should follow the following rules:
\itemize{
\item The whole spectrum data of the various objects should be merged. The
resulting merged object should contain the union of the individual objects'
spectra variables (columns/fields), with eventually missing variables in
one object being filled with \code{NA}.
}
}

\section{In-memory data backends}{
 \code{MsBackendMemory} and \code{MsBackendDataFrame}:

The \code{MsBackendMemory} and \code{MsBackendDataFrame} objects keep all MS data in
memory are thus ideal for fast data processing. Due to their large memory
footprint they are however not suited for large scale experiments. The two
backends store the data different. The \code{MsBackendDataFrame} stores
all data in a \code{DataFrame} and thus supports also S4-classes as
spectra variables. Also, sepratate access to m/z or intensity values (i.e.
using the \code{mz()} and \code{intensity()} methods) is faster for the
\code{MsBackendDataFrame}. The \code{MsBackendMemory} on the other hand, due to the
way the data is organized internally, provides much faster access to the
full peak data (i.e. the numerical matrices of m/z and intensity values).
Also subsetting and access to any spectra variable (except \code{"mz"} and
\code{"intensity"}) is fastest for the \code{MsBackendMemory}.

Thus, for most use cases, the \code{MsBackendMemory} provides a higher
performance and flexibility than the \code{MsBackendDataFrame} and should thus be
preferred. See also issue
\href{https://github.com/rformassspectrometry/Spectra/issues/246}{246} for a
performance comparison.

New objects can be created with the \code{MsBackendMemory()} and
\code{MsBackendDataFrame()} function, respectively. Both backends can be
subsequently initialized with the \code{backendInitialize()} method, taking a
\code{DataFrame} (or \code{data.frame}) with the (full) MS data as first parameter
\code{data}. The second parameter \code{peaksVariables} allows to define which columns
in \code{data} contain \emph{peak variables} such as the m/z and intensity values of
individual peaks per spectrum. The default for this parameter is
\code{peaksVariables = c("mz", "intensity")}. Note that it is not supported to
provide either \code{"mz"} or \code{"intensity"}, if provided, both need to be
present in the data frame. Alternatively, the function also supports a data
frame without m/z and intensity values, in which case a \code{Spectra} without
mass peaks is created.

Suggested columns of this \code{DataFrame} are:
\itemize{
\item \code{"msLevel"}: \code{integer} with MS levels of the spectra.
\item \code{"rt"}: \code{numeric} with retention times of the spectra.
\item \code{"acquisitionNum"}: \code{integer} with the acquisition number of the spectrum.
\item \code{"scanIndex"}: \code{integer} with the index of the scan/spectrum within the
\emph{mzML}/\emph{mzXML}/\emph{CDF} file.
\item \code{"dataOrigin"}: \code{character} defining the \emph{data origin}.
\item \code{"dataStorage"}: \code{character} indicating grouping of spectra in different
e.g. input files. Note that missing values are not supported.
\item \code{"centroided"}: \code{logical} whether the spectrum is centroided.
\item \code{"smoothed"}: \code{logical} whether the spectrum was smoothed.
\item \code{"polarity"}: \code{integer} with the polarity information of the spectra.
\item \code{"precScanNum"}: \code{integer} specifying the index of the (MS1) spectrum
containing the precursor of a (MS2) spectrum.
\item \code{"precursorMz"}: \code{numeric} with the m/z value of the precursor.
\item \code{"precursorIntensity"}: \code{numeric} with the intensity value of the
precursor.
\item \code{"precursorCharge"}: \code{integer} with the charge of the precursor.
\item \code{"collisionEnergy"}: \code{numeric} with the collision energy.
\item \code{"mz"}: \code{\link[IRanges:AtomicList-class]{IRanges::NumericList()}} of \code{numeric} vectors representing the
m/z values for each spectrum.
\item \code{"intensity"}: \code{\link[IRanges:AtomicList-class]{IRanges::NumericList()}} of \code{numeric} vectors
representing the intensity values for each spectrum.
}

Additional columns are allowed too.

The \code{peaksData()} function for \code{MsBackendMemory} and \code{MsBackendDataFrame}
returns a \code{list} of \code{numeric} \code{matrix} by default (with parameter
\code{columns = c("mz", "intensity")}). If other peak variables are requested,
a \code{list} of \code{data.frame} is returned (ensuring m/z and intensity values
are always \code{numeric}).
}

\section{\code{MsBackendMzR}, on-disk MS data backend}{


The \code{MsBackendMzR} keeps only a limited amount of data in memory,
while the spectra data (m/z and intensity values) are fetched from
the raw files on-demand. This backend uses the \code{mzR} package for
data import and retrieval and hence requires that package to be
installed. Also, it can only be used to import and represent data
stored in \emph{mzML}, \emph{mzXML} and \emph{CDF} files.

The \code{MsBackendMzR} backend extends the \code{MsBackendDataFrame} backend using
its \code{DataFrame} to keep spectra variables (except m/z and intensity) in
memory.

New objects can be created with the \code{MsBackendMzR()} function which
can be subsequently filled with data by calling \code{backendInitialize()}
passing the file names of the input data files with argument \code{files}.

This backend provides an \code{export()} method to export data from a \code{Spectra} in
\emph{mzML} or \emph{mzXML} format. The definition of the function is:

\code{export(object, x, file = tempfile(), format = c("mzML", "mzXML"), copy = FALSE)}

The parameters are:
\itemize{
\item \code{object}: an instance of the \code{MsBackendMzR} class.
\item \code{x}: the \link{Spectra} object to be exported.
\item \code{file}: \code{character} with the (full) output file name(s). Should be
of length 1 or equal \code{length(x)}. If a single file is specified, all
spectra are exported to that file. Alternatively it is possible to specify
for each spectrum in \code{x} the name of the file to which it should be
exported (and hence \code{file} has to be of length equal \code{length(x)}).
\item \code{format}: \code{character(1)}, either \code{"mzML"} or \code{"mzXML"} defining the output
file format.
\item \code{copy}: \code{logical(1)} whether general file information should be copied from
the original MS data files. This only works if \code{x} uses a \code{MsBackendMzR}
backend and if \code{dataOrigin(x)} contains the original MS data file names.
\item \code{BPPARAM}: parallel processing settings.
}

See examples in \link{Spectra} or the vignette for more details and
examples.

The \code{MsBackendMzR} ignores parameter \code{columns} of the \code{peaksData()}
function and returns \strong{always} m/z and intensity values.
}

\section{\code{MsBackendHdf5Peaks}, on-disk MS data backend}{


The \code{MsBackendHdf5Peaks} keeps, similar to the \code{MsBackendMzR}, peak data
(i.e. m/z and intensity values) in custom data files (in HDF5 format) on
disk while the remaining spectra variables are kept in memory. This backend
supports updating and writing of manipulated peak data to the data files.

New objects can be created with the \code{MsBackendHdf5Peaks()} function which
can be subsequently filled with data by calling the object's
\code{backendInitialize()} method passing the desired file names of the HDF5 data
files along with the spectra variables in form of a \code{DataFrame} (see
\code{MsBackendDataFrame} for the expected format). An optional parameter
\code{hdf5path} allows to specify the folder where the HDF5 data files should be
stored to. If provided, this is added as the path to the submitted file
names (parameter \code{files}).

By default \code{backendInitialize()} will store all peak data into a single HDF5
file which name has to be provided with the parameter \code{files}. To store peak
data across several HDF5 files \code{data} has to contain a column
\code{"dataStorage"} that defines the grouping of spectra/peaks into files: peaks
for spectra with the same value in \code{"dataStorage"} are saved into the same
HDF5 file. If parameter \code{files} is omitted, the value in \code{dataStorage} is
used as file name (replacing any file ending with \code{".h5"}. To specify the
file names, \code{files}' length has to match the number of unique elements in
\code{"dataStorage"}.

For details see examples on the \code{\link[=Spectra]{Spectra()}} help page.

The \code{MsBackendHdf5Peaks} ignores parameter \code{columns} of the \code{peaksData()}
function and returns \strong{always} m/z and intensity values.
}

\examples{

## The MsBackend class is a virtual class and can not be instantiated
## directly. Below we define a new backend class extending this virtual
## class
MsBackendDummy <- setClass("MsBackendDummy", contains = "MsBackend")
MsBackendDummy()

## This class inherits now all methods from `MsBackend`, all of which
## however throw an error. These methods would have to be implemented
## for the new backend class.
try(mz(MsBackendDummy()))

## See `MsBackendDataFrame` as a reference implementation for a backend
## class (in the *R/MsBackendDataFrame.R* file).

## MsBackendDataFrame
##
## The `MsBackendDataFrame` uses a `S4Vectors::DataFrame` to store all MS
## data. Below we create such a backend by passing a `DataFrame` with all
## data to it.
data <- DataFrame(msLevel = c(1L, 2L, 1L), scanIndex = 1:3)
data$mz <- list(c(1.1, 1.2, 1.3), c(1.4, 54.2, 56.4, 122.1), c(15.3, 23.2))
data$intensity <- list(c(3, 2, 3), c(45, 100, 12.2, 1), c(123, 12324.2))

## Backends are supposed to be created with their specific constructor
## function
be <- MsBackendDataFrame()

be

## The `backendInitialize()` method initializes the backend filling it with
## data. This method can take any parameters needed for the backend to
## get loaded with the data (e.g. a file name from which to load the data,
## a database connection or, in this case, a data frame containing the data).
be <- backendInitialize(be, data)

be

## Data can be accessed with the accessor methods
msLevel(be)

mz(be)

## Even if no data was provided for all spectra variables, its accessor
## methods are supposed to return a value.
precursorMz(be)

## The `peaksData()` method is supposed to return the peaks of the spectra as
## a `list`.
peaksData(be)

## List available peaks variables
peaksVariables(be)

## Use columns to extract specific peaks variables. Below we extract m/z and
## intensity values, but in reversed order to the default.
peaksData(be, columns = c("intensity", "mz"))

## List available spectra variables (i.e. spectrum metadata)
spectraVariables(be)

## Extract precursor m/z, rtime, MS level spectra variables
spectraData(be, c("precursorMz", "rtime", "msLevel"))

## MsBackendMemory
##
## The `MsBackendMemory` uses a more efficient internal data organization
## and allows also adding arbitrary additional peaks variables (annotations)
## Below we thus add a column "peak_ann" with arbitrary names/ids for each
## peak and add the name of this column to the `peaksVariables` parameter
## of the `backendInitialize()` method (in addition to `"mz"` and
## `"intensity"` that should **always** be specified.
data$peak_ann <- list(c("a", "", "d"), c("", "d", "e", "f"), c("h", "i"))
be <- backendInitialize(MsBackendMemory(), data,
    peaksVariables = c("mz", "intensity", "peak_ann"))
be

spectraVariables(be)

## peak_ann is also listed as a peaks variable
peaksVariables(be)

## The additional peaks variable can be accessed using the `peaksData()`
## function
peaksData(be, "peak_ann")

## The $<- method can be used to replace values of an existing peaks
## variable. It is important that the number of elements matches the
## number of peaks per spectrum.
be$peak_ann <- list(1:3, 1:4, 1:2)

## A peaks variable can again be removed by setting it to NULL
be$peak_ann <- NULL

peaksVariables(be)
}
\author{
Johannes Rainer, Sebastian Gibb, Laurent Gatto, Philippine Louail
}
