% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/converters.R
\name{bigSpectronauttoMSstatsFormat}
\alias{bigSpectronauttoMSstatsFormat}
\title{Convert out-of-memory Spectronaut files to MSstats format.}
\usage{
bigSpectronauttoMSstatsFormat(
  input_file,
  output_file_name,
  backend,
  intensity = "F.NormalizedPeakArea",
  filter_by_excluded = FALSE,
  filter_by_identified = FALSE,
  filter_by_qvalue = FALSE,
  qvalue_cutoff = 0.01,
  max_feature_count = 100,
  filter_unique_peptides = FALSE,
  aggregate_psms = FALSE,
  filter_few_obs = FALSE,
  remove_annotation = FALSE,
  calculateAnomalyScores = FALSE,
  anomalyModelFeatures = c(),
  connection = NULL
)
}
\arguments{
\item{input_file}{name of the input text file in 10-column MSstats format.}

\item{output_file_name}{name of an output file which will be saved after pre-processing}

\item{backend}{"arrow" or "sparklyr". Option "sparklyr" requires a spark installation
and connection to spark instance provided in the `connection` parameter.}

\item{intensity}{Name of the intensity column to be used in Spectronaut}

\item{filter_by_excluded}{if TRUE, will filter by the `F.ExcludedFromQuantification` column.}

\item{filter_by_identified}{if TRUE, will filter by the `EG.Identified` column.}

\item{filter_by_qvalue}{if TRUE, will filter by EG.Qvalue and PG.Qvalue columns.}

\item{qvalue_cutoff}{cutoff which will be used for q-value filtering.}

\item{max_feature_count}{maximum number of features per protein. Features will
be selected based on highest average intensity.}

\item{filter_unique_peptides}{If TRUE, shared peptides will be removed.
Please refer to the `Details` section for additional information.}

\item{aggregate_psms}{If TRUE, multiple measurements per PSM in a Run will
be aggregated (by taking maximum value). Please refer to the `Details` section for additional information.}

\item{filter_few_obs}{If TRUE, feature with less than 3 observations across runs will be removed.
Please refer to the `Details` section for additional information.}

\item{remove_annotation}{If TRUE, columns BioReplicate and Condition will be removed
to reduce output file size. These will need to be added manually later before
using dataProcess function. Only applicable to sparklyr backend.}

\item{calculateAnomalyScores}{If TRUE, will carry anomaly model features through pipeline}

\item{anomalyModelFeatures}{Character vector of column names to be carried through the pipeline}

\item{connection}{Connection to a spark instance created with the
`spark_connect` function from `sparklyr` package.}
}
\value{
either arrow object or sparklyr table that can be optionally collected
into memory by using dplyr::collect function.
}
\description{
Convert out-of-memory Spectronaut files to MSstats format.
}
\examples{
converted_data <- bigSpectronauttoMSstatsFormat(
  system.file("extdata", "spectronaut_input.csv", package = "MSstatsBig"),
  "output_file.csv",
  backend="arrow")
converted_data <- dplyr::collect(converted_data)
head(converted_data)

}
