% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_converters.R
\name{MSstatsPTMSiteLocator}
\alias{MSstatsPTMSiteLocator}
\title{Locate modification site number and amino acid}
\usage{
MSstatsPTMSiteLocator(
  data,
  protein_name_col = "ProteinName",
  unmod_pep_col = "PeptideSequence",
  mod_pep_col = "PeptideModifiedSequence",
  clean_mod = FALSE,
  fasta_file = NULL,
  fasta_protein_name = "header",
  mod_id = "\\\\*",
  localization_scores = FALSE,
  localization_cutoff = 0.75,
  remove_unlocalized_peptides = TRUE,
  terminus_included = FALSE,
  terminus_id = "\\\\.",
  mod_id_is_numeric = FALSE,
  remove_underscores = FALSE,
  remove_other_mods = FALSE,
  bracket = FALSE,
  replace_text = FALSE
)
}
\arguments{
\item{data}{\code{data.table} of enriched experimental run. Must include
\code{ProteinName}, \code{PeptideSequence}, \code{PeptideModifiedSequence}, and (optionally)
\code{Start} columns.}

\item{protein_name_col}{Name of column indicating protein. Default is
\code{ProteinName}.}

\item{unmod_pep_col}{Name of column indicating unmodified peptide sequence. Default
is \code{PeptideSequence}.}

\item{mod_pep_col}{Name of column indicating modified peptide sequence. Default
is \code{PeptideModifiedSequence}.}

\item{clean_mod}{Remove special characters and numbers around modification
name. Default is \code{FALSE}}

\item{fasta_file}{File path to FASTA file that matches with proteins in
\code{data}. Can be either string or \code{data.table} processed with \code{tidyFasta()}
function. Default to NULL if peptide number included in \code{data}.}

\item{fasta_protein_name}{Name of fasta file column that matches with
\code{protein_name_col}. Default is \code{header}.}

\item{mod_id}{String that indicates what amino acid was modified in
\code{PeptideSequence}.}

\item{localization_scores}{Boolean indicating if mod id is a localization
score. If TRUE, \code{mod_id} will be ignored and localization cutoff will be
used to determine sites. Default is FALSE.}

\item{localization_cutoff}{Default is .75. Localization probabilities below
cutoffs will be removed. \code{localization_scores} must be TRUE.}

\item{remove_unlocalized_peptides}{Default is TRUE. If \code{localization_scores}
is TRUE and probabilities are below \code{localization_cutoff}, the modification
site will not be able to be determined. These unlocalized peptides can be
kept or removed. If FALSE the unlocalized peptides will still be used in
modeling the sites that could be localized.}

\item{terminus_included}{Boolean indicating if the \code{PeptideSequence} includes
the terminus amino acid.}

\item{terminus_id}{String that indicates what the terminus amino acid is.
Default is '.'.}

\item{mod_id_is_numeric}{Boolean indicating if modification identifier is
a number instead of a character (i.e. +80 vs *).}

\item{remove_underscores}{Boolean indicating if underscores around peptide
exist. These should be removed to properly count where in sequence the
modification occurred.}

\item{remove_other_mods}{keeping mods that are not of interest can mess up
the amino acid count. Remove them if they are causing issues.}

\item{bracket}{bracket type that encompasses PTM (usually \code{[} or \code{(}). Always
pass opening bracket (there is a function to grab the close bracket). Default
is FALSE (i.e. no bracket).}

\item{replace_text}{If PTM is noted by text (i.e. \code{Phospho}) and needs to be
replaced by an indicator (\code{*})}
}
\value{
\code{data.table} with site location added into \code{Protein} column.
}
\description{
Locate modification site number and amino acid
}
\examples{
##TODO

}
