% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/spectralMatching.R
\name{spectralMatching}
\alias{spectralMatching}
\title{Spectral matching for LC-MS/MS datasets}
\usage{
spectralMatching(
  q_dbPth,
  l_dbPth = NA,
  q_purity = NA,
  q_ppmProd = 10,
  q_ppmPrec = 5,
  q_raThres = NA,
  q_pol = NA,
  q_instrumentTypes = NA,
  q_instruments = NA,
  q_sources = NA,
  q_spectraTypes = c("av_all", "inter"),
  q_pids = NA,
  q_rtrange = c(NA, NA),
  q_spectraFilter = TRUE,
  q_xcmsGroups = NA,
  q_accessions = NA,
  l_purity = NA,
  l_ppmProd = 10,
  l_ppmPrec = 5,
  l_raThres = NA,
  l_pol = "positive",
  l_instrumentTypes = NA,
  l_instruments = NA,
  l_sources = NA,
  l_spectraTypes = NA,
  l_pids = NA,
  l_rtrange = c(NA, NA),
  l_spectraFilter = FALSE,
  l_xcmsGroups = NA,
  l_accessions = NA,
  usePrecursors = TRUE,
  raW = 0.5,
  mzW = 2,
  rttol = NA,
  q_dbType = "sqlite",
  q_dbName = NA,
  q_dbHost = NA,
  q_dbUser = NA,
  q_dbPass = NA,
  q_dbPort = NA,
  l_dbType = "sqlite",
  l_dbName = NA,
  l_dbHost = NA,
  l_dbUser = NA,
  l_dbPass = NA,
  l_dbPort = NA,
  cores = 1,
  updateDb = FALSE,
  copyDb = FALSE,
  outPth = "sm_result.sqlite"
)
}
\arguments{
\item{q_dbPth}{character; Path of the database of queries that will be searched against the library spectra. Generated from createDatabase}

\item{l_dbPth}{character; path to library spectral SQLite database. Defaults to msPurityData package data.}

\item{q_purity}{character; Precursor ion purity threshold for the query spectra}

\item{q_ppmProd}{numeric; ppm tolerance for query product}

\item{q_ppmPrec}{numeric; ppm tolerance for query precursor}

\item{q_raThres}{numeric; Relative abundance threshold for query spectra}

\item{q_pol}{character; Polarity of query spectra ('positive', 'negative', NA).}

\item{q_instrumentTypes}{vector; Instrument types for query spectra.}

\item{q_instruments}{vector; Instruments for query spectra (note that this is used in combination with q_instrumentTypes - any
spectra matching either q_instrumentTypes or q_instruments will be used).}

\item{q_sources}{vector; Sources of query spectra (e.g. massbank, hmdb).}

\item{q_spectraTypes}{character; Spectra types of query spectra to perfrom spectral matching e.g. ('scan', 'av_all', 'intra', 'inter')}

\item{q_pids}{vector; pids for query spectra (correspond to column 'pid; in s_peak_meta)}

\item{q_rtrange}{vector; retention time range (in secs) of query spectra, first value mininum time and second value max e.g. c(0, 10) is between 0 and 10 seconds}

\item{q_spectraFilter}{boolean; For query spectra, if prior filtering performed with msPurity, flag peaks will be removed from spectral matching}

\item{q_xcmsGroups}{vector; XCMS group ids for query spectra}

\item{q_accessions}{vector; accession ids to filter query spectra}

\item{l_purity}{character; Precursor ion purity threshold for the library spectra (uses interpolated purity - inPurity)}

\item{l_ppmProd}{numeric; ppm tolerance for library product}

\item{l_ppmPrec}{numeric; ppm tolerance for library precursor}

\item{l_raThres}{numeric; Relative abundance threshold for library spectra}

\item{l_pol}{character; Polarity of library spectra ('positive', 'negative', NA)}

\item{l_instrumentTypes}{vector; Instrument types for library spectra.}

\item{l_instruments}{vector; Instruments for library spectra (note that this is used in combination with q_instrumentTypes - any
spectra matching either q_instrumentTypes or q_instruments will be used).}

\item{l_sources}{vector; Sources of library spectra (e.g. massbank, hmdb).}

\item{l_spectraTypes}{vector; Spectra type of library spectra to perfrom spectral matching with e.g. ('scan', 'av_all', 'intra', 'inter')}

\item{l_pids}{vector; pids for library spectra (correspond to column 'pid; in s_peak_meta)}

\item{l_rtrange}{vector; retention time range (in secs) of library spectra, first value mininum time and second value max e.g. c(0, 10) is between 0 and 10 seconds}

\item{l_spectraFilter}{boolean; For library spectra, if prior filtering performed with msPurity, flag peaks will be removed from spectral matching}

\item{l_xcmsGroups}{vector; XCMS group ids for library spectra}

\item{l_accessions}{vector; accession ids to filter library spectra}

\item{usePrecursors}{boolean; If TRUE spectra will be filtered by similarity of precursors based on ppm range defined by l_ppmPrec and q_ppmPrec}

\item{raW}{numeric; Relative abundance weight for spectra (default to 0.5 as determined by massbank for ESI data)}

\item{mzW}{numeric; mz weight for spectra (default to 2 as determined by massbank for ESI data)}

\item{rttol}{numeric ; Tolerance in time range between the library and query spectra retention time}

\item{q_dbType}{character; Query database type for compound database can be either (sqlite, postgres or mysql)}

\item{q_dbName}{character; Query database name (only applicable for postgres and mysql)}

\item{q_dbHost}{character; Query database host (only applicable for postgres and mysql)}

\item{q_dbUser}{character; Query database user (only applicable for postgres and mysql)}

\item{q_dbPass}{character; Query database pass - Note this is not secure! use with caution (only applicable for postgres and mysql)}

\item{q_dbPort}{character; Query database port (only applicable for postgres and mysql)}

\item{l_dbType}{character; Library database type for compound database can be either (sqlite, postgres or mysql)}

\item{l_dbName}{character; Library database name (only applicable for postgres and mysql)}

\item{l_dbHost}{character; Library database host (only applicable for postgres and mysql)}

\item{l_dbUser}{character; Library database user (only applicable for postgres and mysql)}

\item{l_dbPass}{character; Library database pass - Note this is not secure! use with caution (only applicable for postgres and mysql)}

\item{l_dbPort}{character; Library database port (only applicable for postgres and mysql)}

\item{cores}{numeric; Number of cores to use}

\item{updateDb}{boolean; Update the Query SQLite database with the results}

\item{copyDb}{boolean; If updating the database - perform on a copy rather thatn the original query database}

\item{outPth}{character; If copying the database - the path of the new database file}
}
\value{
Returns a list containing the following elements

\strong{q_dbPth}

Path of the query database (this will have been updated with the annotation results if updateDb argument used)

\strong{xcmsMatchedResults}

If the qeury spectra had XCMS based chromotographic peaks tables (e.g c_peak_groups, c_peaks) in the sqlite database - it will
be possible to summarise the matches for each XCMS grouped feature. The dataframe contains the following columns
\itemize{
\item lpid - id in database of library spectra
\item qpid - id in database of query spectra
\item dpc - dot product cosine of the match
\item rdpc - reverse dot product cosine of the match
\item cdpc - composite dot product cosine of the match
\item mcount - number of matching peaks
\item allcount - total number of peaks across both query and library spectra
\item mpercent - percentage of matching peaks across both query and library spectra
\item library_rt - retention time of library spectra
\item query_rt - retention time of query spectra
\item rtdiff - difference between library and query retention time
\item library_precursor_mz - library precursor mz
\item query_precursor_mz - query precursor mz
\item library_precursor_ion_purity - library precursor ion purity
\item query_precursor_ion_purity - query precursor ion purity
\item library_accession -  library accession value (unique string or number given to eith MoNA or Massbank data entires)
\item library_precursor_type - library precursor type (i.e. adduct)
\item library_entry_name - Name given to the library spectra
\item inchikey - inchikey of the matched library spectra
\item library_source_name - source of the spectra (e.g. massbank, gnps)
\item library_compound_name - name of compound spectra was obtained from
}

\strong{matchedResults}

All matched results from the query spectra to the library spectra. Contains the same columns as above
but without the XCMS details. This table is useful to observe spectral matching results
for all MS/MS spectra irrespective of if they are linked to XCMS MS1 features.

list of database details and dataframe summarising the results for the xcms features
}
\description{
\strong{General}

Perform spectral matching to spectral libraries for an LC-MS/MS dataset.

The spectral matching is performed from a \strong{Query} SQLite spectral-database against a \strong{Library} SQLite spectral-database.

The SQLite schema of the spectral database can be detailed Schema details can be found
\href{https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-datatabase-schema.html}{here}.

The query spectral-database in most cases should contain be the "unknown" spectra database generated the msPurity
function createDatabase as part of a msPurity-XCMS data processing workflow.

The library spectral-database in most cases should contain the "known" spectra from either public or user generated resources.
The library SQLite database by default contains data from MoNA including Massbank, HMDB, LipidBlast and GNPS.
A larger database can be downloaded from \href{https://github.com/computational-metabolomics/msp2db/releases}{here}.
To create a user generated library SQLite database the following tool can be used to generate a SQLite database
from a collection of MSP files: \href{https://github.com/computational-metabolomics/msp2db/releases}{msp2db}.
It should be noted though, that as long as the schema of the spectral-database is as described \href{https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html}{here }, then any database can be used
for either the library or query -  even allowing for the same database to be used.

The spectral matching functionality has four main components, spectral filtering, spectral alignment, spectral matching,
and summarising the results.

Spectral filtering is simply filtering both the library and query spectra to be search against (e.g. choosing
the library source, instrument, retention time, precursor PPM tolerance etc).

The spectral alignment stage involves aligning the query peaks to the library peaks. The approach used is similar
to modified pMatch algorithm described in Zhou et al 2015.

The spectral matching of the aligned spectra is performed against a combined intensity and m/z weighted vector - created for both
the query and library spectra (wq and wl). See below:

\deqn{w=intensity^x * mz^y}

Where x and y represent weight factors, defaults to \emph{x}=0.5 and \emph{y}=2 as per MassBank. These can be adjusted by
the user though.

The aligned weighted vectors are then matched using dot product cosine, reverse dot product cosine and the composite dot product.
See below for dot product cosine equation.

\deqn{dpc =  wq * wl / \sqrt{\sum wq^2} * \sqrt{\sum wl^2}}

See the vigenttes for more details regarding matching algorithms used.

\strong{Example LC-MS/MS processing workflow}
\itemize{
\item Purity assessments
\itemize{
\item (mzML files) -> purityA -> (pa)
}
\item XCMS processing
\itemize{
\item (mzML files) -> xcms.findChromPeaks -> (optionally) xcms.adjustRtime -> xcms.groupChromPeaks -> (xcmsObj)
\item --- \emph{Older versions of XCMS} --- (mzML files) -> xcms.xcmsSet -> xcms.group -> xcms.retcor -> xcms.group -> (xcmsObj)
}
\item Fragmentation processing
\itemize{
\item (xcmsObj, pa) -> frag4feature -> filterFragSpectra -> averageAllFragSpectra -> createDatabase -> \strong{spectralMatching} -> (sqlite spectral database)
}
}
}
\examples{
#====== XCMS =================================
## Read in MS data
#msmsPths <- list.files(system.file("extdata", "lcms", "mzML",
#           package="msPurityData"), full.names = TRUE, pattern = "MSMS")
#ms_data = readMSData(msmsPths, mode = 'onDisk', msLevel. = 1)

## Find peaks in each file
#cwp <- CentWaveParam(snthresh = 5, noise = 100, ppm = 10, peakwidth = c(3, 30))
#xcmsObj  <- xcms::findChromPeaks(ms_data, param = cwp)

## Optionally adjust retention time
#xcmsObj  <- adjustRtime(xcmsObj , param = ObiwarpParam(binSize = 0.6))

## Group features across samples
#pdp <- PeakDensityParam(sampleGroups = c(1, 1), minFraction = 0, bw = 30)
#xcmsObj <- groupChromPeaks(xcmsObj , param = pdp)

#====== msPurity ============================
#pa  <- purityA(msmsPths)
#pa <- frag4feature(pa = pa, xcmsObj = xcmsObj)
#pa <- filterFragSpectra(pa, allfrag=TRUE)
#pa <- averageAllFragSpectra(pa)
#q_dbPth <- createDatabase(pa, xcmsObj, metadata=list('polarity'='positive','instrument'='Q-Exactive'))
#sm_result <- spectralMatching(q_dbPth, cores=4, l_pol='positive')

td <- tempdir()
q_dbPth <- system.file("extdata", "tests", "db", "createDatabase_example.sqlite", package="msPurity")

rid <- paste0(paste0(sample(LETTERS, 5, TRUE), collapse=""),  paste0(sample(9999, 1, TRUE), collapse=""), ".sqlite")
sm_out_pth <- file.path(td, rid)

result <- spectralMatching(q_dbPth, q_xcmsGroups = c(53, 89, 410), cores=1, l_accessions = c('PR100407', 'ML005101', 'CCMSLIB00003740024'),
                          q_spectraTypes = 'av_all',
                          updateDb = TRUE,
                          copyDb = TRUE,
                          outPth = sm_out_pth)


}
