% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/processStudy.R
\encoding{UTF-8}
\name{createStudy2GDS1KG}
\alias{createStudy2GDS1KG}
\title{Create the Profile GDS file(s) for one or multiple specific profiles
using the information from a RDS Sample description file and the 1KG
GDS file}
\usage{
createStudy2GDS1KG(
  pathGeno = file.path("data", "sampleGeno"),
  filePedRDS = NULL,
  pedStudy = NULL,
  fileNameGDS,
  batch = 1,
  studyDF,
  listProfiles = NULL,
  pathProfileGDS = NULL,
  genoSource = c("snp-pileup", "generic", "VCF"),
  verbose = FALSE
)
}
\arguments{
\item{pathGeno}{a \code{character} string representing the path to the
directory containing the VCF output of SNP-pileup for each sample. The
SNP-pileup files must be compressed (gz files) and have the name identifiers
of the samples. A sample with "Name.ID" identifier would have an
associated file called
if genoSource is "VCF", then "Name.ID.vcf.gz",
if genoSource is "generic", then "Name.ID.generic.txt.gz"
if genoSource is "snp-pileup", then "Name.ID.txt.gz".}

\item{filePedRDS}{a \code{character} string representing the path to the
RDS file that contains the information about the sample to analyse.
The RDS file must
include a \code{data.frame} with those mandatory columns: "Name.ID",
"Case.ID", "Sample.Type", "Diagnosis", "Source". All columns must be in
\code{character} strings. The \code{data.frame}
must contain the information for all the samples passed in the
\code{listSamples} parameter. Only \code{filePedRDS} or \code{pedStudy}
can be defined.}

\item{pedStudy}{a \code{data.frame} with those mandatory columns: "Name.ID",
"Case.ID", "Sample.Type", "Diagnosis", "Source". All columns must be in
\code{character} strings (no factor). The \code{data.frame}
must contain the information for all the samples passed in the
\code{listSamples} parameter. Only \code{filePedRDS} or \code{pedStudy}
can be defined.}

\item{fileNameGDS}{a \code{character} string representing the file name of
the Reference GDS file. The file must exist.}

\item{batch}{a single positive \code{integer} representing the current
identifier for the batch. Beware, this field is not stored anymore.
Default: \code{1}.}

\item{studyDF}{a \code{data.frame} containing the information about the
study associated to the analysed sample(s). The \code{data.frame} must have
those 3 columns: "study.id", "study.desc", "study.platform". All columns
must be in \code{character} strings (no factor).}

\item{listProfiles}{a \code{vector} of \code{character} string corresponding
to the profile identifiers that will have a Profile GDS file created. The
profile identifiers must be present in the "Name.ID" column of the Profile
RDS file passed to the \code{filePedRDS} parameter.
If \code{NULL}, all profiles present in the \code{filePedRDS} are selected.
Default: \code{NULL}.}

\item{pathProfileGDS}{a \code{character} string representing the path to
the directory where the Profile GDS files will be created.
Default: \code{NULL}.}

\item{genoSource}{a \code{character} string with two possible values:
'snp-pileup', 'generic' or 'VCF'. It specifies if the genotype files
are generated by snp-pileup (Facets) or are a generic format CSV file
with at least those columns:
'Chromosome', 'Position', 'Ref', 'Alt', 'Count', 'File1R' and 'File1A'.
The 'Count' is the depth at the specified position;
'FileR' is the depth of the reference allele and
'File1A' is the depth of the specific alternative allele.
Finally the file can be a VCF file with at least those genotype
fields: GT, AD, DP.}

\item{verbose}{a \code{logical} indicating if message information should be
printed. Default: \code{FALSE}.}
}
\value{
The function returns \code{0L} when successful.
}
\description{
The function uses the information for the Reference GDS file
and the RDS Sample Description file to create the Profile GDS file. One
Profile GDS file is created per profile. One Profile GDS file will be
created for each entry present in the \code{listProfiles} parameter.
}
\examples{

## Path to the demo 1KG GDS file is located in this package
dataDir <- system.file("extdata/tests", package="RAIDS")
fileGDS <- file.path(dataDir, "ex1_good_small_1KG.gds")

## The data.frame containing the information about the study
## The 3 mandatory columns: "study.id", "study.desc", "study.platform"
## The entries should be strings, not factors (stringsAsFactors=FALSE)
studyDF <- data.frame(study.id = "MYDATA",
                        study.desc = "Description",
                        study.platform = "PLATFORM",
                        stringsAsFactors = FALSE)

## The data.frame containing the information about the samples
## The entries should be strings, not factors (stringsAsFactors=FALSE)
samplePED <- data.frame(Name.ID=c("ex1", "ex2"),
                    Case.ID=c("Patient_h11", "Patient_h12"),
                    Diagnosis=rep("Cancer", 2),
                    Sample.Type=rep("Primary Tumor", 2),
                    Source=rep("Databank B", 2), stringsAsFactors=FALSE)
rownames(samplePED) <- samplePED$Name.ID

## Create the Profile GDS File for samples in 'listSamples' vector
## (in this case, samples "ex1")
## The Profile GDS file is created in the pathProfileGDS directory
result <- createStudy2GDS1KG(pathGeno=dataDir,
            pedStudy=samplePED, fileNameGDS=fileGDS,
            studyDF=studyDF, listProfiles=c("ex1"),
            pathProfileGDS=tempdir(),
            genoSource="snp-pileup",
            verbose=FALSE)

## The function returns OL when successful
result

## The Profile GDS file 'ex1.gds' has been created in the
## specified directory
list.files(tempdir())

## Remove Profile GDS file (created for demo purpose)
unlink(file.path(tempdir(), "ex1.gds"), force=TRUE)


}
\author{
Pascal Belleau, Astrid Deschênes and Alexander Krasnitz
}
