% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/backbone.R
\docType{methods}
\name{methRead}
\alias{methRead}
\alias{methRead,character,character,character-method}
\alias{methRead,character,ANY,ANY-method}
\alias{methRead,character-method}
\alias{methRead,list,list,character-method}
\alias{methRead,list,ANY,ANY-method}
\title{read file(s) to methylRaw or methylRawList objects}
\usage{
methRead(
  location,
  sample.id,
  assembly,
  dbtype = NA,
  pipeline = "amp",
  header = TRUE,
  skip = 0,
  sep = "\\t",
  context = "CpG",
  resolution = "base",
  treatment = NA,
  dbdir = getwd(),
  mincov = 10
)

\S4method{methRead}{character,character,character}(
  location,
  sample.id,
  assembly,
  dbtype,
  pipeline,
  header,
  skip,
  sep,
  context,
  resolution,
  dbdir,
  mincov
)

\S4method{methRead}{character,ANY,ANY}(
  location,
  sample.id,
  assembly,
  dbtype,
  pipeline,
  header,
  skip,
  sep,
  context,
  resolution,
  dbdir,
  mincov
)

\S4method{methRead}{list,list,character}(
  location,
  sample.id,
  assembly,
  dbtype = NA,
  pipeline = "amp",
  header = TRUE,
  skip = 0,
  sep = "\\t",
  context = "CpG",
  resolution = "base",
  treatment = NA,
  dbdir = getwd(),
  mincov = 10
)

\S4method{methRead}{list,ANY,ANY}(
  location,
  sample.id,
  assembly,
  dbtype = NA,
  pipeline = "amp",
  header = TRUE,
  skip = 0,
  sep = "\\t",
  context = "CpG",
  resolution = "base",
  treatment = NA,
  dbdir = getwd(),
  mincov = 10
)
}
\arguments{
\item{location}{file location(s), either a list of locations (each a 
character string) or one location string}

\item{sample.id}{sample.id(s)}

\item{assembly}{a string that defines the genome assembly such as hg18, mm9. 
this is just a string for book keeping. It can be any string. Although,
when using multiple files from the same assembly, this string should be 
consistent in each object.}

\item{dbtype}{type of the flat file database, currently only option 
other than NA is "tabix". When "tabix" is given the objects are 
stored in tabix files, which are compressed and indexed. 
The default value is NA, in which case the objects are stored in 
memory.}

\item{pipeline}{name of the alignment pipeline, it can be either "amp", 
"bismark","bismarkCoverage", "bismarkCytosineReport" or a list (default:'amp'). 
The methylation text files generated from other pipelines can be 
read as generic methylation text files by supplying a named 
\code{\link[base]{list}} argument as "pipeline" argument.
The named \code{list} should containt column numbers which denotes which 
column of the text file corresponds to values and genomic location of the 
methylation events. See Details for more on possible values for this 
argument.}

\item{header}{if the input file has a header or not (default: TRUE)}

\item{skip}{number of lines to skip when reading. Can be set to 1 for bed 
files with track line (default: 0)}

\item{sep}{seperator between fields, same as \code{\link{read.table}} argument 
(default: "\\t")}

\item{context}{methylation context string, ex: CpG,CHG,CHH, etc. (default:CpG)}

\item{resolution}{designates whether methylation information is base-pair 
resolution or regional resolution. allowed values 'base' or 'region'. 
Default 'base'}

\item{treatment}{a vector contatining 0 and 1 denoting which samples are 
control which samples are test}

\item{dbdir}{directory where flat file database(s) should be stored, defaults
to getwd(), working directory.}

\item{mincov}{minimum read coverage to be included in the methylKit objects.
defaults to 10. Any methylated base/region in the text files
below the mincov value will be ignored.}
}
\value{
returns methylRaw, methylRawList, methylRawDB, methylRawListDB object
}
\description{
The function reads a list of files or single files with methylation 
information for bases/region in the genome and creates a methylrawList or 
methylraw object. 
The information can be stored as flat file database by creating a 
methylrawlistDB or methylrawDB object.
}
\section{Details}{

 The output of \code{methRead} is determined by specific input arguments,as there
  are \code{location}, \code{sample.id}, \code{assembly} and \code{dbtype}. 
 The first three are obligatory, while if the last argument is given database 
 features are enabled. 
 If then \code{location} refers to an uncompressed file the function will 
 create a flat file database and 
 the associated methylRawDB object will link to this database. 
 If then \code{location} refers to an earlier created database file then the 
 object will directly link to this database, 
 skipping the preprocessing steps. 
 
 When \code{pipeline} argument is a list, it is exptected to provide a named 
 list with following names.
 'fraction' is a logical value, denoting if the column frequency of Cs has a 
 range from [0-1] or [0-100]. If true it assumes range is [0-1].
 'chr.col" is the number of the column that has chrosome string.   
 'start.col' is the number of the column that has start coordinate of the 
  base/region of the methylation event.
 'end.col'  is the number of the column that has end coordinate of the 
  base/region of the methylation event.
 'coverage.col' is the number of the column that has read coverage values. 
 'strand.col' is the number of the column that has strand information, the 
 strand information in the file has to be in the form of '+' or '-', 
 'freqC.col' is the number of the column that has the frequency of Cs. 
  See examples to see how to read a generic methylation text file.
  
  Other possible values for  \code{pipeline} argument are 'amp','bismark',
  'bismarkCoverage' and 'bismarkCytosineReport'. For 'amp' and 'bismark' 
   the function expects a tabular format shown in the webpage 
   (http://github.com/al2na/methylKit). 
   "amp" and "bismark" expect identical input and are kept for historical 
   reasons. 'amp' was a pipeline used in Akalin et al. 2012 Plos Genetics 
   paper, publicly available in googlecode. 
   
   Bismark aligner can output methylation information per
   base in
   multiple formats. With \code{pipeline='bismarkCoverage'},  
   the function reads bismark coverage files,
   which have chr,start,end, number of cytosines (methylated bases) 
   and number of thymines (unmethylated bases) format.
   If bismark coverage files are used the function will not have 
   the strand information,so beware of that fact.
   With \code{pipeline='bismarkCytosineReport'}, the function expects 
   cytosine report files from Bismark,
   which have chr,start, strand, number of cytosines (methylated bases) 
   , number of thymines (unmethylated bases),context and trinucletide context
   format. 
   
   The function can also read gzipped files. On unix systems, this is achieved
   by using \code{zcat filename} and feeding that into \code{data.table::fread}
   . On Windows, the file is first uncompressed then read into R using 
   \code{data.table::fread}.
}

\examples{

# this is a list of example files, ships with the package
# for your own analysis you will just need to provide set of paths to files
# you will not need the "system.file(..."  part
file.list=list(
         system.file("extdata", "test1.myCpG.txt", package = "methylKit"),
         system.file("extdata", "test2.myCpG.txt", package = "methylKit"),
         system.file("extdata", "control1.myCpG.txt", package = "methylKit"),
         system.file("extdata", "control2.myCpG.txt", package = "methylKit") 
         )

# read the files to a methylRawList object: myobj
myobj=methRead(file.list,
            sample.id=list("test1","test2","ctrl1","ctrl2"),
            assembly="hg18",treatment=c(1,1,0,0))
            
# read one file as methylRaw object
myobj=methRead( file.list[[1]],
            sample.id="test1",assembly="hg18")
            
# read a generic text file containing CpG methylation values
# let's first look at the content of the file
generic.file=system.file("extdata", "generic1.CpG.txt",package = "methylKit")
read.table(generic.file,header=TRUE)

# And this is how you can read that generic file as a methylKit object            
 myobj=methRead( generic.file,
             pipeline=list(fraction=FALSE,chr.col=1,start.col=2,end.col=2,
                           coverage.col=4,strand.col=3,freqC.col=5),
             sample.id="test1",assembly="hg18")
            
# This creates tabix files that save methylation data
# Without specified dbdir first creates a folder named the following 
# in working directory:
# paste("methylDB",Sys.Date(),paste(sample(c(0:9, letters, LETTERS),3,
# replace=TRUE),collapse=""))
#
# Then, saves tabix files from methylKit objects there
 myobj=methRead( file.list,
               sample.id=list("test1","test2","ctrl1","ctrl2"),
               assembly="hg18",treatment=c(1,1,0,0),
               dbtype="tabix") 

# This creates a single tabix files that saves methylation data
# first creates a "methylDB_objects" directory
# Then, saves tabix file from methylKit objects there
 myobj=methRead(file.list[[1]],
               sample.id="test1",
               assembly="hg18",
               dbtype="tabix",dbdir="methylDB_objects")
           
 # tidy up                  
 rm(myobj)              
 unlink(list.files(pattern = "methylDB",full.names = TRUE),recursive = TRUE)
               
               
 
               
}
