% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/readGenomesFromMPF.R
\name{readGenomesFromMPF}
\alias{readGenomesFromMPF}
\title{Read tumor genomes from an MPF file (Mutation Position Format).}
\usage{
readGenomesFromMPF(file, numBases=5, type="Shiraishi", trDir=TRUE,
enforceUniqueTrDir=TRUE, 
refGenome=BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,
transcriptAnno=
TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene,
verbose=TRUE)
}
\arguments{
\item{file}{(Mandatory) The name of the MPF file (can be compressed with
\code{gzip}).}

\item{numBases}{(Mandatory) Total number of bases (mutated base and
flanking bases) to be used for sequence patterns. Must be odd. Default: 5}

\item{type}{(Mandatory) Signature model or type (\code{"Alexandrov"} or
\code{"Shiraishi"}). Default: \code{"Shiraishi"}}

\item{trDir}{(Mandatory) Specifies whether the transcription direction is
taken into account in the signature model. If so, only mutations within
genomic regions with a defined transcription direction can be considered.
Default: \code{TRUE}}

\item{enforceUniqueTrDir}{(Optional) Used only if \code{trDir} is
\code{TRUE}. If \code{enforceUniqueTrDir} is TRUE (default), then mutations
which map to a region with multiple overlapping genes with opposing
transcription directions will be excluded from the analysis. If \code{FALSE},
the transcript direction encountered first in the transcript database (see
\code{transcriptAnno}) is assigned to the mutation. The latter was the
behavior until version 1.3.5 of \code{decompTumor2Sig} and is also the
behavior of \code{pmsignature}. However, it is preferable to exclude
these mutations from the count (default) because from mutation data alone
it cannot be inferred which of the two genes has the higher transcriptional
activity which might potentially be linked to the occurrence of the mutation.
(If you are unsure, use the default setting; this option exists mostly for
backward compatibility with older versions.)}

\item{refGenome}{(Mandatory) The reference genome (\code{BSgenome}) needed
to extract sequence patterns. Default: \code{BSgenome} object for hg19.}

\item{transcriptAnno}{(Optional) Transcript annotation (\code{TxDb} object)
used to determine the transcription direction. This is required only if
\code{trDir} is \code{TRUE}. Default: \code{TxDb} object for hg19.}

\item{verbose}{(Optional) Print information about reading and processing the
mutation data. Default: \code{TRUE}}
}
\value{
A list containing the genomes in terms of frequencies of the mutated
sequence patterns. This list of genomes can be used for
\code{decomposeTumorGenomes}.
}
\description{
`readGenomesFromMPF()` reads somatic mutations of a single tumor genome
(sample) or a set of genomes from an MPF file (Mutation Position Format;
see details below) and determines the mutation frequencies according to
a specific model of mutational signatures (Alexandrov or Shiraishi).
}
\details{
An MPF file has the following format (one line per mutation and
patient/sample):

[sampleID]<tab>[chrom]<tab>[position]<tab>[ref_bases]<tab>[alt_bases]
}
\examples{

### load reference genome and transcript annotation (if direction is needed)
refGenome <- BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19
transcriptAnno <-
  TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene

### read breast cancer genomes from Nik-Zainal et al (PMID: 22608084) 
gfile <- system.file("extdata", "Nik-Zainal_PMID_22608084-MPF.txt.gz", 
         package="decompTumor2Sig")
genomes <- readGenomesFromMPF(gfile, numBases=5, type="Shiraishi",
         trDir=TRUE, enforceUniqueTrDir=TRUE, refGenome=refGenome,
         transcriptAnno=transcriptAnno, verbose=FALSE)

}
\references{
\url{http://rmpiro.net/decompTumor2Sig/}\cr
Krueger, Piro (2019) decompTumor2Sig: Identification of mutational
signatures active in individual tumors. BMC Bioinformatics
20(Suppl 4):152.\cr
}
\seealso{
\code{\link{decompTumor2Sig}}\cr
\code{\link{decomposeTumorGenomes}}\cr
\code{\link{readGenomesFromVCF}}\cr
\code{\link{getGenomesFromMutFeatData}}
}
\author{
Rosario M. Piro\cr Politecnico di Milano\cr Maintainer: Rosario
M. Piro\cr E-Mail: <rmpiro@gmail.com> or <rosariomichael.piro@polimi.it>
}
