### Unit tests for processStudy.R functions

library(RAIDS)
library(withr)
library(gdsfmt)



#############################################################################
### Tests pruningSample() results
#############################################################################


context("pruningSample() results")


test_that("pruningSample() must return error when gdsReference is a character string", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    error_message <- "The \'gdsReference\' must be an object of class \'gds.class\'."

    expect_error(pruningSample(gdsReference=fileGDS, method="corr", currentProfile="test",
        studyID="test", listSNP=NULL, slideWindowMaxBP=5e5, thresholdLD=sqrt(0.1),
        np=1, verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS=FALSE,
        pathProfileGDS=dataDir, keepFile=FALSE, pathPrunedGDS=".",
        outPrefix="pruned"), error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when keepPrunedGDS is a character string", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures",  "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- 'The \'keepPrunedGDS\' parameter must be a logical (TRUE or FALSE).'

    expect_error(pruningSample(gdsReference=gdsF, method="corr", currentProfile="test",
        studyID="test", listSNP=NULL, slideWindowMaxBP=5e5, thresholdLD=sqrt(0.1),
        np=1, verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS="YES",
        pathProfileGDS=dataDir, keepFile=FALSE, pathPrunedGDS=".",
        outPrefix="pruned"), error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when keepFile is a character string", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- 'The \'keepFile\' parameter must be a logical (TRUE or FALSE).'

    expect_error(pruningSample(gdsReference=gdsF, method="corr", currentProfile="1KG_Test",
        studyID="test", listSNP=NULL, slideWindowMaxBP=5e5, thresholdLD=sqrt(0.1),
        np=1, verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS=TRUE,
        pathProfileGDS=dataDir, keepFile="NO", pathPrunedGDS=".",
        outPrefix="pruned"), error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when np is a character string", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'np\' parameter must be a single positive numeric value."

    expect_error(pruningSample(gdsReference=gdsF, method="corr", currentProfile="test",
        studyID="test", listSNP=NULL, slideWindowMaxBP=5e5, thresholdLD=sqrt(0.1),
        np="1", verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS=FALSE,
        pathProfileGDS=dataDir, keepFile=FALSE, pathPrunedGDS=".", outPrefix="pruned"), error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when slideWindowMaxBP is a character string", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'slideWindowMaxBP\' parameter must be a single positive numeric value."

    expect_error(pruningSample(gdsReference=gdsF, method="corr", currentProfile="test",
        studyID="test", listSNP=NULL, slideWindowMaxBP="4", thresholdLD=sqrt(0.1),
        np=1, verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS=FALSE,
        pathProfileGDS=dataDir, keepFile=FALSE, pathPrunedGDS=".", outPrefix="pruned"), error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when thresholdLD is a character string", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'thresholdLD\' parameter must be a single positive numeric value."

    expect_error(pruningSample(gdsReference=gdsF, method="corr", currentProfile="test",
        studyID="test", listSNP=NULL, slideWindowMaxBP=4, thresholdLD="3",
        np=1, verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS=FALSE,
        pathProfileGDS=dataDir, keepFile=FALSE, pathPrunedGDS=".", outPrefix="pruned"), error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when thresholdLD is a vector of numerics", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'thresholdLD\' parameter must be a single positive numeric value."

    expect_error(pruningSample(gdsReference=gdsF, method="corr", currentProfile="test",
        studyID="test", listSNP=NULL, slideWindowMaxBP=4, thresholdLD=c(3,3),
        np=1, verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS=FALSE,
        pathProfileGDS=dataDir, keepFile=FALSE, pathPrunedGDS=".", outPrefix="pruned"), error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when method is a numeric", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())
    error_message <- "The \'method\' parameter must be a character string."

    expect_error(pruningSample(gdsReference=gdsF, method=3, currentProfile="test",
        studyID="test", listSNP=NULL, slideWindowMaxBP=50000L, thresholdLD=sqrt(0.1),
        np=1, verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS=FALSE,
        pathProfileGDS=dataDir, keepFile=FALSE, pathPrunedGDS=".", outPrefix="pruned"), error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when method is not in the list of choices", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    expect_error(pruningSample(gdsReference=gdsF, method="test", currentProfile="test",
        studyID="test", listSNP=NULL, slideWindowMaxBP=50000L, thresholdLD=sqrt(0.1),
        np=1, verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS=FALSE,
        pathProfileGDS=dataDir, keepFile=FALSE, pathPrunedGDS=".", outPrefix="pruned"))
})


test_that("pruningSample() must return error when currentProfile is a numeric", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'currentProfile\' parameter must be a character string."

    expect_error(pruningSample(gdsReference=gdsF, method="corr", currentProfile=2,
        studyID="test", listSNP=NULL, slideWindowMaxBP=50000L,
        thresholdLD=sqrt(0.1), np=1, verbose=FALSE, chr=NULL,
        superPopMinAF=NULL, keepPrunedGDS=FALSE, pathProfileGDS=dataDir,
        keepFile=FALSE, pathPrunedGDS=".", outPrefix="pruned"),
        error_message, fixed=TRUE)
})



test_that("pruningSample() must return error when pathPrunedGDS is a numeric", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- paste0("The \'pathPrunedGDS\' parameter must be ",
        "a character string representing an existing directory.")

    expect_error(pruningSample(gdsReference=gdsF, method="corr", currentProfile="Sample2",
        studyID="test", listSNP=NULL, slideWindowMaxBP=50000L, thresholdLD=sqrt(0.1),
        np=1L, verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS=FALSE,
        pathProfileGDS=dataDir, keepFile=FALSE, pathPrunedGDS=2, outPrefix="pruned"), error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when pathProfileGDS is a numeric", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- paste0("The \'pathProfileGDS\' parameter must be a ",
                    "character string representing an existing directory.")

    expect_error(pruningSample(gdsReference=gdsF, method="corr", currentProfile="Sample2",
            studyID="test", listSNP=NULL, slideWindowMaxBP=50000L, thresholdLD=sqrt(0.1),
            np=1L, verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS=FALSE,
            pathProfileGDS=33, keepFile=FALSE, pathPrunedGDS=dataDir, outPrefix="pruned"), error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when pathProfileGDS is a non existing directory", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- paste0("The \'pathProfileGDS\' parameter must be a ",
                    "character string representing an existing directory.")

    expect_error(pruningSample(gdsReference=gdsF, method="corr",
        currentProfile="Sample2", studyID="test", listSNP=NULL,
        slideWindowMaxBP=50000L, thresholdLD=sqrt(0.1), np=1L,
        verbose=FALSE, chr=NULL, superPopMinAF=NULL, keepPrunedGDS=FALSE,
        pathProfileGDS=paste0(dataDir, "_NOT_EXISTING_DIRECTORY"),
        keepFile=FALSE, pathPrunedGDS=dataDir, outPrefix="pruned"),
        error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when verbose is a character string", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- 'The \'verbose\' parameter must be a logical (TRUE or FALSE).'

    expect_error(pruningSample(gdsReference=gdsF, method="corr",
        currentProfile="Sample2", studyID="test", listSNP=NULL,
        slideWindowMaxBP=50000L, thresholdLD=sqrt(0.1), np=1L, verbose="HI",
        chr=NULL, superPopMinAF=NULL, keepPrunedGDS=FALSE,
        pathProfileGDS=pdataDir, keepFile=FALSE, pathPrunedGDS=dataDir,
        outPrefix="pruned"), error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when GDS Sample file does not exist", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    sampleRDS <- test_path("fixtures", "Sample_Info_Test.RDS")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    fileProfileGDS <- file.path(dataDir, "A_File_That_DOES_NOT_EXIST.gds")
    error_message <- paste0("The Profile GDS file \'", fileProfileGDS,
                                    " does not exist.")

    expect_error(pruningSample(gdsReference=gdsF, method="corr",
        currentProfile="A_File_That_DOES_NOT_EXIST",
        studyID="test", listSNP=NULL, slideWindowMaxBP=50000L,
        thresholdLD=sqrt(0.1), np=1L, verbose=FALSE, chr=NULL,
        superPopMinAF=NULL, keepPrunedGDS=FALSE, pathProfileGDS=dataDir,
        keepFile=FALSE, pathPrunedGDS=dataDir, outPrefix="pruned"),
        error_message, fixed=TRUE)
})


test_that("pruningSample() must return error when no SNV left after filtering", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "ex1_good_small_1KG_GDS.gds")

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                    study.platform="PLATFORM",
                    stringsAsFactors=FALSE)

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())
    dataDirSample <- test_path("fixtures/sampleGDSforPruning")

    file.copy(file.path(dataDirSample, "ex1_demoForPruning.gds"),
              file.path(dataDirSample, "ex1.gds"))
    withr::defer((unlink(file.path(dataDirSample, "ex1.gds"))),
                 envir=parent.frame())

    error_message <- paste0("In pruningSample, the sample ex1 ",
                                    "doesn't have SNPs after filters")

    expect_error(pruningSample(gdsReference=gdsF, method="corr",
        currentProfile="ex1", studyID=studyDF$study.id,
        listSNP=NULL, slideWindowMaxBP=50000L,
        thresholdLD=sqrt(0.1), np=1L, verbose=FALSE, chr=22,
        superPopMinAF=0.41, keepPrunedGDS=TRUE, pathProfileGDS=dataDirSample,
        keepFile=TRUE, pathPrunedGDS=dataDirSample, outPrefix="prunedTest"),
        error_message, fixed=TRUE)
})




test_that("pruningSample() must return error when the study is not found", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "ex1_good_small_1KG_GDS.gds")

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                          study.platform="PLATFORM",
                          stringsAsFactors=FALSE)

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    dataDirSample <- test_path("fixtures/sampleGDSforPruning")

    file.copy(file.path(dataDirSample, "ex1_demoForPruning.gds"),
              file.path(dataDirSample, "ex1.gds"))

    withr::defer((unlink(file.path(dataDirSample, "ex1.gds"))),
                 envir=parent.frame())

    error_message <- paste0("In pruningSample the profile \'ex1\'",
                        " doesn't exists for the study \'demo\'\n")

    expect_error(pruningSample(gdsReference=gdsF, method="corr",
        currentProfile="ex1", studyID="demo",
        listSNP=NULL, slideWindowMaxBP=50000L,
        thresholdLD=sqrt(0.1), np=1L, verbose=FALSE, chr=22,
        superPopMinAF=0.41, keepPrunedGDS=TRUE, pathProfileGDS=dataDirSample,
        keepFile=TRUE, pathPrunedGDS=dataDirSample, outPrefix="prunedTest"),
        error_message, fixed=TRUE)
})


test_that("pruningSample() must return expect result", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "ex1_good_small_1KG_GDS.gds")

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                          study.platform="PLATFORM",
                          stringsAsFactors=FALSE)

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    dataDirSample <- test_path("fixtures/sampleGDSforPruning")

    file.copy(file.path(dataDirSample, "ex1_demoForPruning.gds"),
                      file.path(dataDirSample, "ex1.gds"))
    withr::defer((unlink(file.path(dataDirSample, "prunedTest.Obj.rds"))),
                        envir=parent.frame())
    withr::defer((unlink(file.path(dataDirSample, "prunedTest.rds"))),
                        envir=parent.frame())
    withr::defer((unlink(file.path(dataDirSample, "ex1.gds"))),
                 envir=parent.frame())

    result <- pruningSample(gdsReference=gdsF, method="corr",
                currentProfile="ex1", studyID=studyDF$study.id,
                listSNP=NULL, slideWindowMaxBP=50000L,
                thresholdLD=sqrt(0.1), np=1L, verbose=FALSE, chr=NULL,
                superPopMinAF=NULL, keepPrunedGDS=TRUE,
                pathProfileGDS=dataDirSample, keepFile=TRUE,
                pathPrunedGDS=dataDirSample, outPrefix="prunedTest")

    expect_equal(result, 0L)
    expect_true(file.exists(file.path(dataDirSample, "prunedTest.Obj.rds")))
    expect_true(file.exists(file.path(dataDirSample, "prunedTest.rds")))
})


#############################################################################
### Tests add1KG2SampleGDS() results
#############################################################################


context("add1KG2SampleGDS() results")


test_that("add1KG2SampleGDS() must return error when gdsReference is a character string", {

    error_message <- "The \'gdsReference\' must be an object of class \'gds.class\'."

    expect_error(add1KG2SampleGDS(gdsReference="toto.gds", fileProfileGDS="sample.gds",
        currentProfile="sample", studyID="TCGA"), error_message, fixed=TRUE)
})


test_that("add1KG2SampleGDS() must return error when fileProfileGDS is a numeric value", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- paste0("The \'fileProfileGDS\' must be a character ",
            "string representing the GDS Sample file. The file must exist.")

    expect_error(add1KG2SampleGDS(gdsReference=gdsF, fileProfileGDS=33,
        currentProfile="sample", studyID="TCGA"), error_message, fixed=TRUE)
})

test_that("add1KG2SampleGDS() must return error when currentProfile is a numeric value", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'currentProfile\' must be a character string."

    expect_error(add1KG2SampleGDS(gdsReference=gdsF, fileProfileGDS=fileGDS,
            currentProfile=33, studyID="TCGA"), error_message, fixed=TRUE)
})


test_that("add1KG2SampleGDS() must return error when studyID is a numeric value", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'studyID\' must be a character string."

    expect_error(add1KG2SampleGDS(gdsReference=gdsF, fileProfileGDS=fileGDS,
        currentProfile="Test", studyID=22), error_message, fixed=TRUE)
})


test_that("add1KG2SampleGDS() must return expect result", {

    dataDir <- test_path("fixtures")

    fileGDS <- test_path("fixtures", "ex1_good_small_1KG_GDS.gds")

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                          study.platform="PLATFORM",
                          stringsAsFactors=FALSE)

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    dataDirSample <- test_path("fixtures/sampleGDSforAddingGenotype")


    file.copy(file.path(dataDirSample, "ex1_demoForAddGenotype.gds"),
              file.path(dataDirSample, "ex1.gds"))
    withr::defer((unlink(file.path(dataDirSample, "ex1.gds"))),
                 envir=parent.frame())

    result <- add1KG2SampleGDS(gdsReference=gdsF,
                fileProfileGDS=file.path(dataDirSample, "ex1.gds"),
                currentProfile=c("ex1"), studyID=studyDF$study.id)

    expect_equal(result, 0L)

    content <- openfn.gds(file.path(dataDirSample, "ex1.gds"))
    withr::defer((gdsfmt::closefn.gds(content)), envir=parent.frame())

    expect_true(gdsfmt::exist.gdsn(content, "pruned.study"))
    expect_true(gdsfmt::exist.gdsn(content, "geno.ref"))
    expect_true(gdsfmt::exist.gdsn(content, "sample.id"))
    expect_true(gdsfmt::exist.gdsn(content, "snp.id"))
    expect_true(gdsfmt::exist.gdsn(content, "snp.position"))
    expect_true(gdsfmt::exist.gdsn(content, "snp.index"))
    expect_true(gdsfmt::exist.gdsn(content, "genotype"))
    expect_true(gdsfmt::exist.gdsn(content, "lap"))
})


#############################################################################
### Tests computePCARefSample() results
#############################################################################


context("computePCARefSample() results")


test_that("computePCARefSample() must return error when gdsProfile isnumeric value", {

    error_message <- "The \'gdsProfile\' must be an object of class \'gds.class\'."

    expect_error(computePCARefSample(gdsProfile="test.gds", currentProfile="test",
                            studyIDRef="Ref.1KG", np=1L, algorithm="exact",
                            eigenCount=32L), error_message, fixed=TRUE)
})


test_that("computePCARefSample() must return error when currentProfile is numeric value", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'currentProfile\' parameter must be a character string."

    expect_error(computePCARefSample(gdsProfile=gdsF, currentProfile=22,
                            studyIDRef="Ref.1KG", np=1L, algorithm="exact",
                            eigenCount=32L), error_message, fixed=TRUE)
})


test_that("computePCARefSample() must return error when studyIDRef is numeric value", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'studyIDRef\' parameter must be a character string."

    expect_error(computePCARefSample(gdsProfile=gdsF, currentProfile="Synthetic",
                            studyIDRef=33, np=1L, algorithm="exact",
                            eigenCount=32L), error_message, fixed=TRUE)
})


test_that("computePCARefSample() must return error when np is a character string", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'np\' parameter must be a single positive integer."

    expect_error(computePCARefSample(gdsProfile=gdsF, currentProfile="TCGA",
                        studyIDRef="Ref.1KG", np="1", algorithm="exact",
                        eigenCount=32L), error_message, fixed=TRUE)
})


test_that("computePCARefSample() must return error when np is a numeric value", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'algorithm\' parameter must be a character string."

    expect_error(computePCARefSample(gdsProfile=gdsF, currentProfile="TCGA",
                                studyIDRef="Ref.1KG", np=1L, algorithm=33,
                                eigenCount=32L), error_message, fixed=TRUE)
})


test_that("computePCARefSample() must return error when algorithm is a numeric value", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'algorithm\' parameter must be a character string."

    expect_error(computePCARefSample(gdsProfile=gdsF, currentProfile="TCGA",
            studyIDRef="Ref.1KG", np=1L, algorithm=33,
            eigenCount=32L), error_message, fixed=TRUE)
})


test_that("computePCARefSample() must return error when algorithm is not a valid choice", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    expect_error(computePCARefSample(gdsProfile=gdsF, currentProfile="TCGA",
            studyIDRef="Ref.1KG", np=1L, algorithm="sun", eigenCount=32L))
})


test_that("computePCARefSample() must return error when eigenCount is a string", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- "The \'eigenCount\' parameter must be a single integer."

    expect_error(computePCARefSample(gdsProfile=gdsF, currentProfile="TCGA",
        studyIDRef="Ref.1KG", np=1L, algorithm="sun", eigenCount="32L"),
        error_message, fixed=TRUE)
})


test_that("computePCARefSample() must return error when missingRate is negative value", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- paste0("The \'missingRate\' must be a single numeric ",
                                "positive value between 0 and 1 or NaN.")

    expect_error(computePCARefSample(gdsProfile=gdsF, currentProfile="TCGA",
        studyIDRef="Ref.1KG", np=1L, algorithm="sun", eigenCount=32L,
        missingRate=-0.02), error_message, fixed=TRUE)
})


test_that("computePCARefSample() must return error when algorithm is not in the list", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    error_message <- paste0("\'arg\' should be one of \"exact\", ",
                                        "\"randomized\"")

    expect_error(computePCARefSample(gdsProfile=gdsF, currentProfile="TCGA",
        studyIDRef="Ref.1KG", np=1L, algorithm="TITI", eigenCount=32L,
        missingRate=0.02), error_message, fixed=TRUE)
})


test_that("computePCARefSample() must return error when verbose is number", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((closefn.gds(gdsF)), envir=parent.frame())

    error_message <- paste0("The \'verbose\' parameter must be a ",
                                "logical (TRUE or FALSE).")

    expect_error(computePCARefSample(gdsProfile=gdsF, currentProfile="TCGA",
            studyIDRef="Ref.1KG", np=1L, algorithm="sun", eigenCount=32L,
            missingRate=0.02, verbose=33), error_message, fixed=TRUE)
})


test_that("computePCARefSample() must return expected results", {

    pathFile <- test_path("fixtures/sampleGDSforAncestryByFile")

    gdsF <- snpgdsOpen(file.path(pathFile, "ex1.gds"))
    withr::defer((closefn.gds(gdsF)), envir=parent.frame())

    set.seed(121)

    resPCA <- computePCARefSample(gdsProfile=gdsF,
        currentProfile=c("ex1"), studyIDRef="Ref.1KG", np=1L, verbose=FALSE)

    expect_true(is.list(resPCA))
    expect_equal(length(resPCA), 3)
    expect_equal(names(resPCA), c("sample.id", "eigenvector.ref",
                                    "eigenvector"))
    expect_equal(resPCA$sample.id, "ex1")

    expect_true(is.matrix(resPCA$eigenvector.ref))
    expect_equal(rownames(resPCA$eigenvector.ref)[c(1, 5, 8, 22, 44,
        54, 76, 77, 90, 123, 144)], c("HG00243", "HG00138", "HG00275",
        "HG01171", "HG02299", "HG02661", "HG02974", "HG03367", "HG03081",
        "NA18986","NA19663"))
    expect_null(colnames(resPCA$eigenvector.ref))
    expect_equal(resPCA$eigenvector.ref[6, c(1, 3, 4, 6, 7)],
                 c(-0.039769078873965, -0.011391586850322, 0.002447759571484,
                    -0.024845048933754, -0.025926328506198))
    expect_equal(resPCA$eigenvector.ref[44, c(1, 3, 5, 6, 9)],
                 c(-0.020546066486524, 0.010608436969775, -0.019062812032447,
                    -0.028429871761276, 0.005059217676424))

    expect_true(is.matrix(resPCA$eigenvector))
    expect_equal(rownames(resPCA$eigenvector), "ex1")
    expect_null(colnames(resPCA$eigenvector))
    expect_equal(resPCA$eigenvector[1, c(1, 3, 4, 6, 7, 22, 25, 32)],
        c(-0.039179255703460, -0.186164267684954, -0.057606409172989,
            -0.082740705154894, 0.077792400461185, -0.214690314074977,
            -0.280651868285924, -0.329149757549027))
})


#############################################################################
### Tests addStudy1Kg() results
#############################################################################


context("addStudy1Kg() results")


test_that("addStudy1Kg() must return error when gdsReference is a numeric value", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    error_message <- "The \'gdsReference\' must be an object of class \'gds.class\'."

    expect_error(addStudy1Kg(gdsReference=33, fileProfileGDS=fileGDS),
                                error_message, fixed=TRUE)
})


test_that("addStudy1Kg() must return error when fileProfileGDS is a numeric value", {

    ## Create a temporary GDS file in an test directory
    fileGDS <- test_path("fixtures", "GDS_TEMP_processStudy_101.gds")

    ## Create and open a temporary GDS file
    GDS_file_tmp  <- local_GDS_Sample_file(fileGDS)

    error_message <- paste0("The \'fileProfileGDS\' must be a character ",
        "string representing the GDS Sample file. The file must exist.")

    expect_error(addStudy1Kg(gdsReference=GDS_file_tmp, fileProfileGDS=33),
                    error_message, fixed=TRUE)

    ## Close GDS file
    ## The file will automatically be deleted
    closefn.gds(gdsfile=GDS_file_tmp)
})


test_that("addStudy1Kg() must return expected results", {

    ## Create a temporary GDS file in an test directory
    gdsFile1KG <- test_path("fixtures", "GDS_TEMP_processStudy_1KG_102.gds")

    ## Create and open a temporary GDS file 1KG
    GDS_file_tmp_1KG  <- local_GDS_1KG_file(gdsFile1KG, env=parent.frame())

    ## Create and open a temporary GDS Sample file
    gdsFileSample <- test_path("fixtures",
                                "GDS_TEMP_processStudy_Sample_102.gds")
    GDS_file_Sample <- createfn.gds(gdsFileSample)

    study.list <- data.frame(study.id=c("HTT Study"),
                        study.desc=c("Important Study"),
                        study.platform=c("Panel"), stringsAsFactors=FALSE)

    add.gdsn(GDS_file_Sample, "study.list", study.list)

    study.annot <- data.frame(data.id=c("TOTO1"), case.id=c("TOTO1"),
                sample.type=c("Study"), diagnosis=c("Study"),
                source=rep("IGSR"), study.id=c("Study"),
                stringsAsFactors=FALSE)

    add.gdsn(GDS_file_Sample, "study.annot", study.annot)

    sync.gds(GDS_file_Sample)

    closefn.gds(GDS_file_Sample)
    withr::defer((unlink(gdsFileSample, force=TRUE)), envir=parent.frame())

    result0 <- addStudy1Kg(gdsReference=GDS_file_tmp_1KG,
                                fileProfileGDS=gdsFileSample)

    gds_sample_file <- openfn.gds(gdsFileSample, readonly=TRUE)

    result1 <- read.gdsn(index.gdsn(node=gds_sample_file, path="study.list"))
    result2 <- read.gdsn(index.gdsn(node=gds_sample_file, path="study.annot"))

    ## Close GDS file
    ## The file will automatically be deleted
    closefn.gds(gdsfile=GDS_file_tmp_1KG)
    closefn.gds(gdsfile=gds_sample_file)

    expected1 <- data.frame(study.id=c("HTT Study", "Ref.1KG"),
        study.desc=c("Important Study", "Unrelated samples from 1000 Genomes"),
        study.platform=c("Panel", "GRCh38 1000 genotypes"),
        stringsAsFactors=FALSE)

    expected2 <- data.frame(data.id=c("TOTO1", "HTT101", "HTT103"),
                case.id=c("TOTO1", "HTT101", "HTT103"),
                sample.type=c("Study", rep("Reference", 2)),
                diagnosis=c("Study", rep("Reference", 2)),
                source=rep("IGSR", 3), study.id=c("Study", "Ref.1KG", "Ref.1KG"),
                stringsAsFactors=FALSE)

    expect_equal(result0, 0L)
    expect_equal(result1, expected1)
    expect_equal(result2, expected2)
})



test_that("addStudy1Kg() must return expected results when 1KG already present", {

    ## Create a temporary GDS file in an test directory
    gdsFile1KG <- test_path("fixtures", "GDS_TEMP_processStudy_1KG_02.gds")

    ## Create and open a temporary GDS file 1KG
    GDS_file_tmp_1KG  <- local_GDS_1KG_file(gdsFile1KG, env=parent.frame())

    ## Create and open a temporary GDS Sample file
    gdsFileSample <- test_path("fixtures",
                                "GDS_TEMP_processStudy_Sample_02.gds")
    GDS_file_Sample <- createfn.gds(gdsFileSample)

    study.list <- data.frame(study.id=c("Ref.1KG"),
                        study.desc=c("Important Study"),
                        study.platform=c("Panel"), stringsAsFactors=FALSE)

    add.gdsn(GDS_file_Sample, "study.list", study.list)

    study.annot <- data.frame(data.id=c("TOTO1"), case.id=c("TOTO1"),
                             sample.type=c("Study"), diagnosis=c("Study"),
                             source=rep("IGSR"), study.id=c("Ref.1KG"),
                             stringsAsFactors=FALSE)

    add.gdsn(GDS_file_Sample, "study.annot", study.annot)

    sync.gds(GDS_file_Sample)

    closefn.gds(GDS_file_Sample)
    withr::defer((unlink(gdsFileSample)), envir=parent.frame())

    result0 <- addStudy1Kg(gdsReference=GDS_file_tmp_1KG, fileProfileGDS=gdsFileSample)

    gds_sample_file <- openfn.gds(gdsFileSample, readonly=TRUE)

    result1 <- read.gdsn(index.gdsn(node=gds_sample_file, path="study.list"))

    result2 <- read.gdsn(index.gdsn(node=gds_sample_file, path="study.annot"))

    ## Close GDS file
    ## The file will automatically be deleted
    closefn.gds(gdsfile=GDS_file_tmp_1KG)
    closefn.gds(gdsfile=gds_sample_file)

    expected1 <- study.list

    expected2 <- study.annot

    expect_equal(result0, 0L)
    expect_equal(result1, expected1)
    expect_equal(result2, expected2)
})


#############################################################################
### Tests createStudy2GDS1KG() results
#############################################################################

context("createStudy2GDS1KG() results")


test_that(paste0("createStudy2GDS1KG() must return error when filePedRDS is",
            " a numeric value and pedStudy is NULL"), {

    dataDir <- system.file("extdata/tests", package="RAIDS")

    error_message <- paste0("The \'filePedRDS\' must be a character string ",
            "representing the RDS Sample information file. ",
            "The file must exist.")

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
            filePedRDS=33, pedStudy=NULL, fileNameGDS=NULL,
            batch=1, studyDF=NULL, listProfiles=NULL,
            pathProfileGDS=NULL, genoSource="snp-pileup", verbose=TRUE),
            error_message)
})


test_that("createStudy2GDS1KG() must return error when filePedRDS is NULL and pedStudy is NULL", {

    error_message <- paste0("One of the parameter \'fineNamePED\' of ",
                        "\'pedStudy\' must be defined.")

    expect_error(createStudy2GDS1KG(pathGeno=file.path("data", "sampleGeno"),
                    filePedRDS=NULL, pedStudy=NULL, fileNameGDS=NULL,
                    batch=1, studyDF=NULL, listProfiles=NULL,
                    pathProfileGDS=NULL,
                    genoSource="snp-pileup", verbose=TRUE), error_message)
})


test_that("createStudy2GDS1KG() must return error when pedDF is missing mandatory column", {

    dataDir <- system.file("extdata/tests", package="RAIDS")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                    Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                    Sample.Type=rep("Primary Tumor", 3),
                    Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    error_message <- paste0("The PED study data frame is incomplete. ",
                            "One or more mandatory columns are missing.")

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
                    filePedRDS=NULL, pedStudy=pedDF, fileNameGDS=NULL,
                    batch=1, studyDF=NULL, listProfiles=NULL,
                    pathProfileGDS=NULL,
                    genoSource="snp-pileup", verbose=TRUE), error_message)
})


test_that("createStudy2GDS1KG() must return error when fileNameGDS is numerical value", {

    dataDir <- system.file("extdata/tests", package="RAIDS")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                    Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                    Diagnosis=rep("Cancer", 3),
                    Sample.Type=rep("Primary Tumor", 3),
                    Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    error_message <- paste0("The \'fileNameGDS\' must be a character string ",
        "representing the Population Reference GDS file. The file must exist.")

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
                        filePedRDS=NULL, pedStudy=pedDF, fileNameGDS=33,
                        batch=1, studyDF=NULL, listProfiles=NULL,
                        pathProfileGDS=NULL,
                        genoSource="snp-pileup", verbose=TRUE), error_message)
})


test_that("createStudy2GDS1KG() must return error when batch is character string", {

    dataDir <- test_path("fixtures")
    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                    Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                    Diagnosis=rep("Cancer", 3),
                    Sample.Type=rep("Primary Tumor", 3),
                    Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    error_message <- "The \'batch\' must be a single integer."

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
            filePedRDS=NULL, pedStudy=pedDF, fileNameGDS=fileGDS,
            batch="1", studyDF=NULL, listProfiles=NULL,
            pathProfileGDS=NULL,
            genoSource="snp-pileup", verbose=TRUE), error_message)
})


test_that("createStudy2GDS1KG() must return error when batch is vector of numerics", {

    dataDir <- test_path("fixtures")
    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                    Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                    Diagnosis=rep("Cancer", 3),
                    Sample.Type=rep("Primary Tumor", 3),
                    Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    error_message <- "The \'batch\' must be a single integer."

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
                filePedRDS=NULL, pedStudy=pedDF, fileNameGDS=fileGDS,
                batch=c(1,2), studyDF=NULL, listProfiles=NULL,
                pathProfileGDS=NULL,
                genoSource="snp-pileup", verbose=TRUE), error_message)
})


test_that("createStudy2GDS1KG() must return error when listSamples is vector of numerics", {

    dataDir <- system.file("extdata/tests", package="RAIDS")
    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                    Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                    Diagnosis=rep("Cancer", 3),
                    Sample.Type=rep("Primary Tumor", 3),
                    Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                          study.platform="PLATFORM", stringsAsFactors=FALSE)

    error_message <- paste0("The \'listProfiles\' must be a vector ",
                        "of character strings (1 entry or more) or NULL.")

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
            filePedRDS=NULL, pedStudy=pedDF, fileNameGDS=fileGDS,
            batch=1, studyDF=studyDF, listProfiles=c(1,2),
            pathProfileGDS=NULL,
            genoSource="snp-pileup", verbose=TRUE), error_message, fixed=TRUE)
})


test_that("createStudy2GDS1KG() must return error when listProfiles is numeric", {

    dataDir <- test_path("fixtures")
    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                Diagnosis=rep("Cancer", 3),
                Sample.Type=rep("Primary Tumor", 3),
                Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                        study.platform="PLATFORM", stringsAsFactors=FALSE)

    error_message <- paste0("The \'listProfiles\' must be a vector ",
                            "of character strings (1 entry or more) or NULL.")

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
        filePedRDS=NULL, pedStudy=pedDF, fileNameGDS=fileGDS,
        batch=1, studyDF=studyDF, listProfiles=1,
        pathProfileGDS=NULL,
        genoSource="snp-pileup", verbose=TRUE), error_message, fixed=TRUE)
})


test_that("createStudy2GDS1KG() must return error when studyDF is missing column", {

    dataDir <- test_path("fixtures")
    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                    Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                    Diagnosis=rep("Cancer", 3),
                    Sample.Type=rep("Primary Tumor", 3),
                    Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                         stringsAsFactors=FALSE)

    error_message <- paste0("The study data frame \'studyDF\' is incomplete. ",
        "One or more mandatory columns are missing. The mandatory ",
        "columns are: \'study.id\', \'study.desc\', \'study.platform\'.")

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
        filePedRDS=NULL, pedStudy=pedDF, fileNameGDS=fileGDS,
        batch=1, studyDF=studyDF, listProfiles=1,
        pathProfileGDS=NULL,
        genoSource="snp-pileup", verbose=TRUE), error_message, fixed=TRUE)
})


test_that("createStudy2GDS1KG() must return error when verbose is numeric", {

    dataDir <- test_path("fixtures")
    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                    Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                    Diagnosis=rep("Cancer", 3),
                    Sample.Type=rep("Primary Tumor", 3),
                    Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                    study.platform="PLATFORM", stringsAsFactors=FALSE)

    error_message <- "The \'verbose\' parameter must be a logical (TRUE or FALSE)."

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
            filePedRDS=NULL, pedStudy=pedDF, fileNameGDS=fileGDS,
            batch=1, studyDF=studyDF, listProfiles=NULL,
            pathProfileGDS=dataDir,
            genoSource="snp-pileup", verbose=22), error_message, fixed=TRUE)
})

test_that("createStudy2GDS1KG() must return error when the gdsProfile already exists", {

    dataDir <- test_path("fixtures")
    fileGDS <- test_path("fixtures", "1KG_Test.gds")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                        Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                        Diagnosis=rep("Cancer", 3),
                        Sample.Type=rep("Primary Tumor", 3),
                        Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                          study.platform="PLATFORM", stringsAsFactors=FALSE)

    error_message <- paste0("The gds file for ", "GDS_Sample_with_study_demo", " already exist.")

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
                                    filePedRDS=NULL, pedStudy=pedDF, fileNameGDS=fileGDS,
                                    batch=1, studyDF=studyDF, listProfiles=c("GDS_Sample_with_study_demo"),
                                    pathProfileGDS=dataDir,
                                    genoSource="snp-pileup", verbose=FALSE), error_message, fixed=TRUE)
})


test_that("createStudy2GDS1KG() must return error when pathProfileGDS is numeric", {

    dataDir <- test_path("fixtures")
    gdsFile <- test_path("fixtures", "1KG_Test.gds")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                Diagnosis=rep("Cancer", 3),
                Sample.Type=rep("Primary Tumor", 3),
                Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                study.platform="PLATFORM", stringsAsFactors=FALSE)

    error_message <- paste0("The \'pathProfileGDS\' must be a character ",
        "string representing the path where the Profile GDS files ",
        "will be generated.")

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
        filePedRDS=NULL, pedStudy=pedDF, fileNameGDS=gdsFile,
        batch=1, studyDF=studyDF, listProfiles=NULL,
        pathProfileGDS=33,
        genoSource="snp-pileup", verbose=FALSE), error_message, fixed=TRUE)
})


test_that("createStudy2GDS1KG() must return error when both filePedRDS and pedStudy are defined", {

    dataDir <- system.file("extdata/tests", package="RAIDS")
    fileGDS <- file.path(dataDir, "1KG_Test.gds")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                        Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                        Diagnosis=rep("Cancer", 3),
                        Sample.Type=rep("Primary Tumor", 3),
                        Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                    study.platform="PLATFORM", stringsAsFactors=FALSE)

    error_message <- paste0("Both \'filePedRDS\' and \'pedStudy\' parameters ",
                        "cannot be defined at the same time.")

    expect_error(createStudy2GDS1KG(pathGeno=file.path("data", "sampleGeno"),
        filePedRDS=fileGDS, pedStudy=pedDF, fileNameGDS=fileGDS,
        batch=1, studyDF=studyDF, listProfiles=NULL,
        pathProfileGDS=dataDir,
        genoSource="snp-pileup", verbose=TRUE), error_message, fixed=TRUE)
})


test_that("createStudy2GDS1KG() must return error when genoSource not in list of choices", {

    dataDir <- system.file("extdata/tests", package="RAIDS")
    fileGDS <- file.path(dataDir, "1KG_Test.gds")

    pedDF <- data.frame(Name.ID=c("Sample_01", "Sample_02", "Sample_03"),
                        Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                        Diagnosis=rep("Cancer", 3),
                        Sample.Type=rep("Primary Tumor", 3),
                        Source=rep("Databank B", 3), stringsAsFactors=FALSE)

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                          study.platform="PLATFORM", stringsAsFactors=FALSE)

    expect_error(createStudy2GDS1KG(pathGeno=dataDir,
        filePedRDS=NULL, pedStudy=pedDF, fileNameGDS=fileGDS,
        batch=1, studyDF=studyDF, listProfiles=NULL,
        pathProfileGDS=dataDir, genoSource="snp-CANADA", verbose=TRUE))
})


test_that("createStudy2GDS1KG() must return expected results when all parameters ok", {

    dataDir <- test_path("fixtures")
    fileGDS <- file.path(dataDir, "ex1_good_small_1KG_GDS.gds")

    withr::defer((unlink(file.path(dataDir, "ex1.gds"))), envir=parent.frame())


    pedDF <- data.frame(Name.ID=c("ex1", "ex2", "ex3"),
                Case.ID=c("Patient_h11", "Patient_h12", "Patient_h18"),
                Diagnosis=rep("Cancer", 3),
                Sample.Type=rep("Primary Tumor", 3),
                Source=rep("Databank B", 3), stringsAsFactors=FALSE)
    rownames(pedDF) <- pedDF$Name.ID

    studyDF <- data.frame(study.id="MYDATA",
                            study.desc="Description",
                            study.platform="PLATFORM",
                            stringsAsFactors=FALSE)

    result <- createStudy2GDS1KG(pathGeno=dataDir,
                pedStudy=pedDF, fileNameGDS=fileGDS,
                batch=1, studyDF=studyDF, listProfiles=c("ex1"),
                pathProfileGDS=dataDir,
                genoSource="snp-pileup", verbose=FALSE)

    expect_true(file.exists(file.path(dataDir, "ex1.gds")))
    expect_equal(result, 0L)
})


#############################################################################
### Tests computePoolSyntheticAncestryGr() results
#############################################################################

context("computePoolSyntheticAncestryGr() results")


test_that("computePoolSyntheticAncestryGr() must return error when gdsProfile is character string", {

    error_message <- "The \'gdsProfile\' must be an object of class \'gds.class\'"

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    expect_error(computePoolSyntheticAncestryGr(gdsProfile="test.gds",
                sampleRM=c("1", "2"), studyIDSyn="Synthetic", np=1L,
                spRef=spRef, eigenCount=15L), error_message)
})


test_that("computePoolSyntheticAncestryGr() must return error when sampleRM is vector of numeric values", {

    fileGDS <- test_path("fixtures", "GDS_Sample_with_study_demo.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    error_message <- "The \'sampleRM\' parameter must be a vector of character strings."

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
            sampleRM=c(1, 2), studyIDSyn="Synthetic", np=1L,
            spRef=spRef, algorithm="exact", eigenCount=32L), error_message)
})


test_that("computePoolSyntheticAncestryGr() must return error when studyIDSyn is numeric value", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    error_message <- "The \'studyIDSyn\' parameter must be a character string."

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
        sampleRM=c("Sample01", "Sample02"), studyIDSyn=11, np=1L,
        spRef=spRef, algorithm="exact", eigenCount=32L), error_message)
})


test_that("computePoolSyntheticAncestryGr() must return error when listCatPop is numeric value", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    error_message <- paste0("The \'listCatPop\' parameter must be a vector of ",
                                "character strings.")

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
        sampleRM=c("Sample01", "Sample02"), studyIDSyn="Test", np=1L,
        listCatPop=11,
        spRef=spRef, algorithm="exact", eigenCount=32L), error_message)
})


test_that("computePoolSyntheticAncestryGr() must return error when np is character string", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    error_message <- "The \'np\' parameter must be a single positive integer."

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
                sampleRM=c("1", "2"), studyIDSyn="Synthetic", np="12",
                spRef=spRef, eigenCount=15L), error_message)
})


test_that("computePoolSyntheticAncestryGr() must return error when np is zero", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    error_message <- "The \'np\' parameter must be a single positive integer."

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
                    sampleRM=c("1", "2"), studyIDSyn="Synthetic", np=0L,
                    spRef=spRef, eigenCount=15L), error_message)
})


test_that("computePoolSyntheticAncestryGr() must return error when kList is a vector with zero", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    error_message <- paste0("The \'kList\' parameter must be a vector of ",
                                "positive integers.")

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
        sampleRM=c("1", "2"), studyIDSyn="Synthetic", np=1L, kList=c(0, 1),
        pcaList=c(1, 2), spRef=spRef, eigenCount=15L), error_message)
})


test_that("computePoolSyntheticAncestryGr() must return error when pcaList is a vector with zero", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    error_message <- paste0("The \'pcaList\' parameter must be a ",
                                "vector of positive integers.")

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
        sampleRM=c("1", "2"), studyIDSyn="Synthetic", np=1L,
        pcaList=c(0, 1, 2), spRef=spRef, eigenCount=15L), error_message)
})


test_that("computePoolSyntheticAncestryGr() must return error when algorithm is zero", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    error_message <- "The \'algorithm\' parameter must be a character string."

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
                    sampleRM=c("1", "2"), studyIDSyn="Synthetic", np=1L,
                    spRef=spRef, algorithm=22, eigenCount=15L), error_message)
})


test_that("computePoolSyntheticAncestryGr() must return error when algorithm is not in the list of choices", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
                sampleRM=c("1", "2"), studyIDSyn="Synthetic", np=1L,
                spRef=spRef, algorithm="Hello", eigenCount=15L))
})


test_that("computePoolSyntheticAncestryGr() must return error when eigenCount is character string", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    error_message <- "The \'eigenCount\' parameter must be a single integer."

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
                sampleRM=c("1", "2"), studyIDSyn="Synthetic", np=1L,
                spRef=spRef, algorithm="exact", eigenCount="15"), error_message)
})


test_that("computePoolSyntheticAncestryGr() must return error when missingRate is character string", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    error_message <- paste0("The \'missingRate\' parameter must be a single ",
                            "positive numeric between zero and one or NaN.")

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
        sampleRM=c("1", "2"), studyIDSyn="Synthetic", np=1L, spRef=spRef,
        algorithm="exact", eigenCount=15L, missingRate="0.02"), error_message)
})


test_that("computePoolSyntheticAncestryGr() must return error when verbose is character string", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    error_message <- paste0("The \'verbose\' parameter must be a ",
                                "logical (TRUE or FALSE).")

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
        sampleRM=c("1", "2"), studyIDSyn="Synthetic", np=1L, spRef=spRef,
        algorithm="exact", eigenCount=15L, missingRate=0.02, verbose="QC"),
        error_message, fixed=TRUE)
})


test_that("computePoolSyntheticAncestryGr() must return error when spRef is character string", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())


    error_message <- paste0("The \'spRef\' parameter must be a vector of ",
                    "character strings with profile identifiers as names.")

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
        sampleRM=c("1", "2"), studyIDSyn="Synthetic", np=1L, spRef="HI",
        algorithm="exact", eigenCount=15L, missingRate=0.02, verbose=TRUE),
        error_message, fixed=TRUE)
})


test_that("computePoolSyntheticAncestryGr() must return error when fieldPopInfAnc is numeric", {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    spRef <- c("EUR", "AFR")
    names(spRef) <- c("HG01", "HG02")

    error_message <- paste0("The \'fieldPopInfAnc\' parameter must be a ",
                                "character string.")

    expect_error(computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
        sampleRM=c("1", "2"), studyIDSyn="Synthetic", np=1L, spRef=spRef,
        fieldPopInfAnc=33, algorithm="exact", eigenCount=15L, missingRate=0.02,
        verbose=TRUE), error_message, fixed=TRUE)
})


test_that("computePoolSyntheticAncestryGr() must return expected results when Klist and pcaList NULL", {

    fileGDS <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry", "ex1.gds")
    gdsSample <- snpgdsOpen(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    # The name of the synthetic study
    studyID <- "MYDATA.Synthetic"

    samplesRM <- c("HG00246", "HG00325", "HG00611", "HG01173", "HG02165",
        "HG01112", "HG01615", "HG01968", "HG02658", "HG01850", "HG02013",
        "HG02465", "HG02974", "HG03814", "HG03445", "HG03689", "HG03789",
        "NA12751", "NA19107", "NA18548", "NA19075", "NA19475", "NA19712",
        "NA19731", "NA20528", "NA20908")
    names(samplesRM) <- c("GBR", "FIN", "CHS","PUR", "CDX", "CLM", "IBS",
        "PEL", "PJL", "KHV", "ACB", "GWD", "ESN", "BEB", "MSL", "STU", "ITU",
        "CEU", "YRI", "CHB", "JPT", "LWK", "ASW", "MXL", "TSI", "GIH")

    refAncestry <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry",
                                    "knownSuperPop1KG.RDS")
    refKnownSuperPop <- readRDS(refAncestry)

    set.seed(121)
    results <- computePoolSyntheticAncestryGr(gdsProfile=gdsSample,
        sampleRM=samplesRM, studyIDSyn=studyID, np=1L, spRef=refKnownSuperPop,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),
        fieldPopInfAnc="SuperPop", algorithm="exact",
        kList=NULL, pcaList=NULL, eigenCount=15L, missingRate=0.02,
        verbose=FALSE)

    expect_equal(nrow(results$matKNN), 5096)
    expect_equal(ncol(results$matKNN), 4)
    expect_equal(colnames(results$matKNN), c("sample.id", "D", "K", "SuperPop"))
    expect_equal(unique(results$matKNN$D), seq(2, 15, 1))
    expect_equal(unique(results$matKNN$K), seq(2, 15, 1))
    expect_equal(unique(results$matKNN$SuperPop),
                 c("SAS", "EAS", "EUR", "AMR", "AFR"))

    expect_equal(results$sample.id, c( "1.ex1.HG00246.1", "1.ex1.HG00325.1",
        "1.ex1.HG00611.1", "1.ex1.HG01173.1", "1.ex1.HG02165.1",
        "1.ex1.HG01112.1", "1.ex1.HG01615.1", "1.ex1.HG01968.1",
        "1.ex1.HG02658.1", "1.ex1.HG01850.1", "1.ex1.HG02013.1",
        "1.ex1.HG02465.1", "1.ex1.HG02974.1", "1.ex1.HG03814.1",
        "1.ex1.HG03445.1", "1.ex1.HG03689.1", "1.ex1.HG03789.1",
        "1.ex1.NA12751.1", "1.ex1.NA19107.1", "1.ex1.NA18548.1",
        "1.ex1.NA19075.1", "1.ex1.NA19475.1", "1.ex1.NA19712.1",
        "1.ex1.NA19731.1", "1.ex1.NA20528.1", "1.ex1.NA20908.1"))

    expect_equal(results$sample1Kg, c("HG00246", "HG00325", "HG00611",
        "HG01173", "HG02165", "HG01112", "HG01615", "HG01968", "HG02658",
        "HG01850", "HG02013", "HG02465", "HG02974", "HG03814", "HG03445",
        "HG03689", "HG03789", "NA12751", "NA19107", "NA18548", "NA19075",
        "NA19475", "NA19712", "NA19731", "NA20528", "NA20908"))

    expect_equal(results$sp, c("HG00246"="EUR", "HG00325"="EUR", "HG00611"="EAS",
        "HG01173"="AMR", "HG02165"="EAS", "HG01112"="AMR", "HG01615"="EUR",
        "HG01968"="AMR", "HG02658"="SAS", "HG01850"="EAS", "HG02013"="AFR",
        "HG02465"="AFR", "HG02974"="AFR", "HG03814"="SAS", "HG03445"="AFR",
        "HG03689"="SAS", "HG03789"="SAS", "NA12751"="EUR", "NA19107"="AFR",
        "NA18548"="EAS", "NA19075"="EAS", "NA19475"="AFR",
        "NA19712"="AFR", "NA19731"="AMR", "NA20528"="EUR", "NA20908"="SAS"))
})


#############################################################################
### Tests computeAncestryFromSyntheticFile() results
#############################################################################


context("computeAncestryFromSyntheticFile() results")


test_that("computeAncestryFromSyntheticFile() must return error when gdsReference is character string", {

    fileGDS <- test_path("fixtures", "GDS_Sample_with_study_demo.gds")
    gdsSample <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsSample), envir=parent.frame())

    error_message <- "The \'gdsReference\' must be an object of class \'gds.class\'"

    expect_error(computeAncestryFromSyntheticFile(gdsReference="test.gds",
        gdsProfile=fileGDS, listFiles, currentProfile="test",
        spRef=c("EUR", "AFR"), studyIDSyn, np=1L,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),
        fieldPopIn1KG="superPop", fieldPopInfAnc="SuperPop",
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=NaN), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when gdsProfile is character string"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- "The \'gdsProfile\' must be an object of class \'gds.class\'"

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF,
        gdsProfile="sample.gds", listFiles, currentProfile="test",
        spRef, studyIDSyn, np=1L, listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),
        fieldPopIn1KG="superPop", fieldPopInfAnc="SuperPop",
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=NaN), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when currentProfile is numeric"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- "The \'currentProfile\' parameter must be a character string."

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF,
        gdsProfile=gdsF, listFiles, currentProfile=33,
        spRef=c("EUR", "AFR"), studyIDSyn, np=1L,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),
        fieldPopIn1KG="superPop", fieldPopInfAnc="SuperPop",
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=NaN), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when spRef is numeric"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- "The \'spRef\' parameter must be a vector of character strings."

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF,
        gdsProfile=gdsF, listFiles, currentProfile="test",
        spRef=33, studyIDSyn, np=1L, listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),
        fieldPopIn1KG="superPop", fieldPopInfAnc="SuperPop",
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=NaN), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when studyIDSyn is integer"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- "The \'studyIDSyn\' parameter must be a character string."

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=fileGDS, currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn=12L, np=1L, listCatPop=c("EAS", "EUR", "AFR"),
        fieldPopIn1KG="superPop", fieldPopInfAnc="SuperPop",
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=NaN), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when algorithm is numeric"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- "The \'algorithm\' parameter must be a character string."

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=fileGDS, currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn="Synthetic", np=1L, listCatPop=c("EAS", "EUR", "AFR"),
        fieldPopIn1KG="superPop", fieldPopInfAnc="SuperPop",
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm=23,
        eigenCount=32L,  missingRate=0.2), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when np is negative"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- "The \'np\' parameter must be a single positive integer."

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=fileGDS, currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn="Synthetic", np=-1L, listCatPop=c("EAS", "EUR", "AFR"),
        fieldPopIn1KG="superPop", fieldPopInfAnc="SuperPop",
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=0.2), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when listCatPop is numeric"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- paste0("The \'listCatPop\' parameter must be a vector of ",
                                    "character strings.")

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=fileGDS, currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn="Synthetic", np=1L, listCatPop=c(1, 2, 3),
        fieldPopIn1KG="superPop", fieldPopInfAnc="SuperPop",
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=0.2), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when missingRate is negative"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- paste0("The \'missingRate\' must be a single ",
                        "numeric positive value between 0 and 1 or NaN.")

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=fileGDS, currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn="Synthetic", np=1L, listCatPop=c("EAS", "EUR", "AFR"),
        fieldPopIn1KG="superPop", fieldPopInfAnc="SuperPop",
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=-0.2), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when eigenCount is a vector of integers"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- "The \'eigenCount\' parameter must be a single integer."

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=fileGDS, currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn="Synthetic", np=1L, listCatPop=c("EAS", "EUR", "AFR"),
        fieldPopIn1KG="superPop", fieldPopInfAnc="SuperPop",
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=c(2L, 3L),  missingRate=0.2), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when fieldPopIn1KG is numeric"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- paste0("The \'fieldPopIn1KG\' parameter must be a ",
                                "character string.")

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=fileGDS, currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn="Synthetic", np=1L, listCatPop=c("EAS", "EUR", "AFR"),
        fieldPopIn1KG=22, fieldPopInfAnc="SuperPop",
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=0.2), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when fieldPopInfAnc is vector of strings"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- paste0("The \'fieldPopInfAnc\' parameter must be a ",
                                    "character string.")

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=fileGDS, currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn="Synthetic", np=1L, listCatPop=c("EAS", "EUR", "AFR"),
        fieldPopIn1KG="test", fieldPopInfAnc=c("SuperPop", "test"),
        kList=seq(2, 15, 1), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=0.2), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when kList has one negative numeric"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- paste0("The \'kList\' parameter must be a vector of ",
                                "positive integers.")

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=fileGDS, currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn="Synthetic", np=1L, listCatPop=c("EAS", "EUR", "AFR"),
        fieldPopIn1KG="test", fieldPopInfAnc="SuperPop",
        kList=c(1, 2, -3, 4), pcaList=seq(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=0.2), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when pcaList has one negative numeric"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- paste0("The \'pcaList\' parameter must be a vector of ",
                                "positive integers.")

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=fileGDS, currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn="Synthetic", np=1L, listCatPop=c("EAS", "EUR", "AFR"),
        fieldPopIn1KG="test", fieldPopInfAnc="SuperPop",
        kList=c(1, 2, 3, 4), pcaList=c(2, -15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=0.2), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when file does not exist"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- "The file \'toto\' does not exist."

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=c("toto"), currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn="Synthetic", np=1L, listCatPop=c("EAS", "EUR", "AFR"),
        fieldPopIn1KG="test", fieldPopInfAnc="SuperPop",
        kList=c(1, 2, 3, 4), pcaList=c(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=0.2), error_message)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return error when verbose is numeric"), {

    fileGDS <- test_path("fixtures", "1KG_Test.gds")
    gdsF <- openfn.gds(fileGDS)
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    error_message <- "The \'verbose\' parameter must be a logical (TRUE or FALSE)."

    expect_error(computeAncestryFromSyntheticFile(gdsReference=gdsF, gdsProfile=gdsF,
        listFiles=fileGDS, currentProfile="sample01", spRef=c("EUR", "AFR"),
        studyIDSyn="Synthetic", np=1L, listCatPop=c("EAS", "EUR", "AFR"),
        fieldPopIn1KG="test", fieldPopInfAnc="SuperPop",
        kList=c(1, 2, 3, 4), pcaList=c(2, 15, 1), algorithm="exact",
        eigenCount=32L,  missingRate=0.2, verbose=33), error_message, fixed=TRUE)
})


test_that(paste0("computeAncestryFromSyntheticFile() must return expected results"), {

    fileGDS <- test_path("fixtures/sampleGDSforAncestryByFile/gdsRef")
    gdsRef <- openfn.gds(file.path(fileGDS, "ex1kg.gds"))
    withr::defer(closefn.gds(gdsRef), envir=parent.frame())

    fileGDS <- test_path("fixtures/sampleGDSforAncestryByFile")
    gdsF <- snpgdsOpen(file.path(fileGDS, "ex1.gds"))
    withr::defer(closefn.gds(gdsF), envir=parent.frame())

    ## List of the KNN result files from PCA run on synthetic data
    fileKNN <- test_path("fixtures/sampleGDSforAncestryByFile/filesKNN")
    listFilesName <- dir(file.path(fileKNN), ".rds")
    listFiles <- file.path(file.path(fileKNN) , listFilesName)

    # The name of the synthetic study
    studyID <- "MYDATA.Synthetic"

    ## The known ancestry for the 1KG reference profiles
    filePop <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    refKnownSuperPop <- readRDS(file.path(filePop, "knownSuperPop1KG.RDS"))

    set.seed(111)

    ## Run the ancestry inference on one profile called 'ex1'
    ## The values of K and D used for the inference are selected using the
    ## synthetic results
    res <- computeAncestryFromSyntheticFile(gdsReference=gdsRef, gdsProfile=gdsF,
            listFiles=listFiles, currentProfile="ex1", spRef=refKnownSuperPop,
            studyIDSyn=studyID, np=1L,
            listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),
            fieldPopIn1KG="superPop",
            fieldPopInfAnc="SuperPop",kList=NULL,
            pcaList=NULL, algorithm="exact",
            eigenCount=32L,  missingRate=NaN, verbose=FALSE)

    expect_true(is.list(res))
    expect_true(all(c("pcaSample", "paraSample", "KNNSample", "Ancestry") %in%
                                names(res)))

    expect_true(is.vector(res$pcaSample))
    expect_true(all(c("sample.id", "eigenvector.ref", "eigenvector") %in%
                        names(res$pcaSample)))
    expect_equal(res$pcaSample$sample.id, "ex1")
    expect_equal(res$pcaSample$eigenvector[, c(3, 22, 26, 30)],
        c(-0.186164267684954, -0.214690314074977, 0.409505274265608,
                        -0.323542785477152))
    expect_equal(res$pcaSample$eigenvector.ref[4, c(3, 22, 26, 30)],
        c(-0.018272328822580, -0.034625002667539, -0.021132042058256,
                        -0.018541974308472))

    expect_true(is.vector(res$KNNSample))
    expect_true(all(c("sample.id", "matKNN") %in%
                        names(res$KNNSample)))
    expect_equal(res$KNNSample$sample.id, "ex1")
    expect_true(is.data.frame(res$KNNSample$matKNN))
    expect_true(all(c("sample.id", "D", "K", "SuperPop") %in%
                        names(res$KNNSample$matKNN)))
    expect_equal(res$KNNSample$matKNN[c(3, 55, 132, 188), 1],
                    c("ex1", "ex1", "ex1", "ex1"))
    expect_equal(res$KNNSample$matKNN[c(13, 25, 135, 168), 2], c(2, 3, 11, 13))
    expect_equal(res$KNNSample$matKNN[c(6, 21, 132, 161), 3], c(7, 8, 7, 8))
    expect_equal(res$KNNSample$matKNN[c(16, 27, 112, 171), 4],
                        c("SAS", "EAS", "SAS", "SAS"))

    expect_true(is.vector(res$paraSample))
    expect_true(all(c("dfPCA", "dfPop", "dfAUROC", "D", "K", "listD") %in%
                        names(res$paraSample)))
    expect_equal(res$paraSample$D, 7)
    expect_equal(res$paraSample$K, 8)
    expect_equal(res$paraSample$listD, 7)

    expect_true(is.data.frame(res$Ancestry))
    expect_equal(res$Ancestry$sample.id, "ex1")
    expect_equal(res$Ancestry$SuperPop, "EAS")
    expect_equal(res$Ancestry$D, 7)
    expect_equal(res$Ancestry$K, 8)
})


#############################################################################
### Tests runExomeAncestry() results
#############################################################################


context("runExomeAncestry() results")


test_that("runExomeAncestry() must return error when pathOut is numeric", {

    pathOut <- test_path("fixtures")
    fileGDS <- file.path(pathOut, "ex1_good_small_1KG_GDS.gds")
    gdsFileAnnot <- file.path(pathOut, "ex1_good_small_1KG_Annot_GDS.gds")

    chromosome <- c(956422L, 193529L, 295559L, 214555L)

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                    study.platform="PLATFORM", stringsAsFactors=FALSE)

    ## Pedigree Study data frame
    ped <- data.frame(Name.ID=c("Sample_01", "Sample_02"),
                Case.ID=c("TCGA-H01", "TCGA-H02"),
                Sample.Type=c("DNA", "DNA"),
                Diagnosis=c("Cancer", "Cancer"), Source=c("TCGA", "TCGA"))

    ## Profiles used for synthetic data set
    syntheticRefDF <- data.frame(sample.id=c("HG00150", "HG00138", "HG00330",
        "HG00275"), pop.group=c("GBR", "GBR","FIN", "FIN"),
        superPop=c("EUR", "EUR", "EUR", "EUR"), stringsAsFactors=FALSE)

    error_message <- paste0("The \'pathOut\' must be a character string ",
            "representing the path where the output files will be generated.")

    expect_error(runExomeAncestry(pedStudy=ped, studyDF=studyDF,
        pathProfileGDS=pathOut,
        pathGeno=pathOut, pathOut=33, fileReferenceGDS=fileGDS,
        fileReferenceAnnotGDS=gdsFileAnnot, chrInfo=chromosome,
        syntheticRefDF=syntheticRefDF,
        genoSource="snp-pileup"), error_message)
})


test_that("runExomeAncestry() must return error when fileReferenceGDS is numeric", {

    pathOut <- test_path("fixtures")
    fileGDS <- file.path(pathOut, "ex1_good_small_1KG_GDS.gds")
    gdsFileAnnot <- file.path(pathOut, "ex1_good_small_1KG_Annot_GDS.gds")

    chromosome <- c(956422L, 193529L, 295559L, 214555L)

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                        study.platform="PLATFORM", stringsAsFactors=FALSE)

    ## Pedigree Study data frame
    ped <- data.frame(Name.ID=c("Sample_01", "Sample_02"),
                Case.ID=c("TCGA-H01", "TCGA-H02"),
                Sample.Type=c("DNA", "DNA"),
                Diagnosis=c("Cancer", "Cancer"), Source=c("TCGA", "TCGA"))

    ## Profiles used for synthetic data set
    syntheticRefDF <- data.frame(sample.id=c("HG00150", "HG00138", "HG00330",
            "HG00275"), pop.group=c("GBR", "GBR","FIN", "FIN"),
            superPop=c("EUR", "EUR", "EUR", "EUR"), stringsAsFactors=FALSE)

    error_message <- paste0("The \'fileReferenceGDS\' must be a character ",
        "string representing the Reference GDS file. The file must exist.")

    expect_error(runExomeAncestry(pedStudy=ped, studyDF=studyDF,
        pathProfileGDS=pathOut, pathGeno=pathOut, pathOut=pathOut,
        fileReferenceGDS=33, fileReferenceAnnotGDS=gdsFileAnnot,
        chrInfo=chromosome, syntheticRefDF,
        genoSource="snp-pileup"), error_message)
})


test_that("runExomeAncestry() must return error when fileReferenceAnnotGDS is numeric", {

    pathOut <- test_path("fixtures")
    fileGDS <- file.path(pathOut, "ex1_good_small_1KG_GDS.gds")
    gdsFileAnnot <- file.path(pathOut, "ex1_good_small_1KG_Annot_GDS.gds")

    chromosome <- c(956422L, 193529L, 295559L, 214555L)

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                          study.platform="PLATFORM", stringsAsFactors=FALSE)

    ## Pedigree Study data frame
    ped <- data.frame(Name.ID=c("Sample_01", "Sample_02"),
                      Case.ID=c("TCGA-H01", "TCGA-H02"),
                      Sample.Type=c("DNA", "DNA"),
                      Diagnosis=c("Cancer", "Cancer"), Source=c("TCGA", "TCGA"))

    ## Profiles used for synthetic data set
    syntheticRefDF <- data.frame(sample.id=c("HG00150", "HG00138", "HG00330",
            "HG00275"), pop.group=c("GBR", "GBR","FIN", "FIN"),
            superPop=c("EUR", "EUR", "EUR", "EUR"), stringsAsFactors=FALSE)

    error_message <- paste0("The \'fileReferenceAnnotGDS\' must be a character",
            " string representing the Reference Annotation GDS file. ",
            "The file must exist.")

    expect_error(runExomeAncestry(pedStudy=ped, studyDF=studyDF,
            pathProfileGDS=pathOut, pathGeno=pathOut, pathOut=pathOut,
            fileReferenceGDS=fileGDS, fileReferenceAnnotGDS=32,
            chrInfo=chromosome, syntheticRefDF=syntheticRefDF,
            genoSource="snp-pileup"), error_message)
})


test_that("runExomeAncestry() must return error when chrInfo is vector of characters", {

    pathOut <- test_path("fixtures")
    fileGDS <- file.path(pathOut, "ex1_good_small_1KG_GDS.gds")
    gdsFileAnnot <- file.path(pathOut, "ex1_good_small_1KG_Annot_GDS.gds")

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                          study.platform="PLATFORM", stringsAsFactors=FALSE)

    ## Pedigree Study data frame
    ped <- data.frame(Name.ID=c("Sample_01", "Sample_02"),
                      Case.ID=c("TCGA-H01", "TCGA-H02"),
                      Sample.Type=c("DNA", "DNA"),
                      Diagnosis=c("Cancer", "Cancer"),
                      Source=c("TCGA", "TCGA"), stringsAsFactors=FALSE)

    ## Profiles used for synthetic data set
    syntheticRefDF <- data.frame(sample.id=c("HG00150", "HG00138", "HG00330",
            "HG00275"), pop.group=c("GBR", "GBR","FIN", "FIN"),
            superPop=c("EUR", "EUR", "EUR", "EUR"), stringsAsFactors=FALSE)

    error_message <- paste0("The 'chrInfo' parameter must be a ",
            "vector of positive integers.")

    expect_error(runExomeAncestry(pedStudy=ped, studyDF=studyDF,
        pathProfileGDS=pathOut, pathGeno=pathOut, pathOut=pathOut,
        fileReferenceGDS=fileGDS, fileReferenceAnnotGDS=gdsFileAnnot,
        chrInfo=c("ALLO", "TEST"), syntheticRefDF=syntheticRefDF,
        genoSource="snp-pileup"), error_message)
})


test_that("runExomeAncestry() must return error when syntheticRefDF missing column", {

    pathOut <- test_path("fixtures")
    fileGDS <- file.path(pathOut, "ex1_good_small_1KG_GDS.gds")
    gdsFileAnnot <- file.path(pathOut, "ex1_good_small_1KG_Annot_GDS.gds")

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                          study.platform="PLATFORM", stringsAsFactors=FALSE)

    ## Pedigree Study data frame
    ped <- data.frame(Name.ID=c("Sample_01", "Sample_02"),
                      Case.ID=c("TCGA-H01", "TCGA-H02"),
                      Sample.Type=c("DNA", "DNA"),
                      Diagnosis=c("Cancer", "Cancer"),
                      Source=c("TCGA", "TCGA"), stringsAsFactors=FALSE)

    ## Profiles used for synthetic data set
    syntheticRefDF <- data.frame(sample.id=c("HG00150", "HG00138", "HG00330",
            "HG00275"), pop.group=c("GBR", "GBR","FIN", "FIN"),
            stringsAsFactors=FALSE)

    error_message <- paste0("The reference profile data frame ",
        "\'syntheticRefDF\' is incomplete. One or more mandatory columns are ",
        "missing. The mandatory columns are: \'sample.id\', ",
        "\'pop.group\', \'superPop\'.")

    expect_error(runExomeAncestry(pedStudy=ped, studyDF=studyDF,
        pathProfileGDS=pathOut, pathGeno=pathOut, pathOut=pathOut,
        fileReferenceGDS=fileGDS, fileReferenceAnnotGDS=gdsFileAnnot,
        chrInfo=c(100L, 200L), syntheticRefDF=syntheticRefDF,
        genoSource="snp-pileup"), error_message)
})


test_that("runExomeAncestry() must return error when pathGeno does not exist", {

    pathOut <- test_path("fixtures")
    fileGDS <- file.path(pathOut, "ex1_good_small_1KG_GDS.gds")
    gdsFileAnnot <- file.path(pathOut, "ex1_good_small_1KG_Annot_GDS.gds")

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                    study.platform="PLATFORM", stringsAsFactors=FALSE)

    ## Pedigree Study data frame
    ped <- data.frame(Name.ID=c("Sample_01", "Sample_02"),
                        Case.ID=c("TCGA-H01", "TCGA-H02"),
                        Sample.Type=c("DNA", "DNA"),
                        Diagnosis=c("Cancer", "Cancer"),
                        Source=c("TCGA", "TCGA"), stringsAsFactors=FALSE)

    ## Profiles used for synthetic data set
    syntheticRefDF <- data.frame(sample.id=c("HG00150", "HG00138", "HG00330",
                "HG00275"), pop.group=c("GBR", "GBR","FIN", "FIN"),
                superPop=c("EUR", "EUR", "EUR", "EUR"), stringsAsFactors=FALSE)

    error_message <- "The \'pathGeno\' must be an existing directory."

    expect_error(runExomeAncestry(pedStudy=ped, studyDF=studyDF,
        pathProfileGDS=pathOut, pathGeno="DONOTEXISTDIR", pathOut=pathOut,
        fileReferenceGDS=fileGDS, fileReferenceAnnotGDS=gdsFileAnnot,
        chrInfo=c(100L, 200L), syntheticRefDF=syntheticRefDF,
        genoSource="snp-pileup"), error_message)
})




test_that("runExomeAncestry() must return error when verbose is a numeric", {

    pathOut <- test_path("fixtures")
    fileGDS <- file.path(pathOut, "ex1_good_small_1KG_GDS.gds")
    gdsFileAnnot <- file.path(pathOut, "ex1_good_small_1KG_Annot_GDS.gds")

    studyDF <- data.frame(study.id="MYDATA", study.desc="Description",
                    study.platform="PLATFORM", stringsAsFactors=FALSE)

    ## Pedigree Study data frame
    ped <- data.frame(Name.ID=c("Sample_01", "Sample_02"),
                         Case.ID=c("TCGA-H01", "TCGA-H02"),
                        Sample.Type=c("DNA", "DNA"),
                        Diagnosis=c("Cancer", "Cancer"),
                        Source=c("TCGA", "TCGA"), stringsAsFactors=FALSE)

    ## Profiles used for synthetic data set
    syntheticRefDF <- data.frame(sample.id=c("HG00150", "HG00138", "HG00330",
        "HG00275"), pop.group=c("GBR", "GBR","FIN", "FIN"),
        superPop=c("EUR", "EUR", "EUR", "EUR"), stringsAsFactors=FALSE)

    error_message <- "The 'verbose' parameter must be a logical (TRUE or FALSE)."

    expect_error(runExomeAncestry(pedStudy=ped, studyDF=studyDF,
        pathProfileGDS=pathOut, pathGeno=pathOut, pathOut=pathOut,
        fileReferenceGDS=fileGDS, fileReferenceAnnotGDS=gdsFileAnnot,
        chrInfo=c(100L, 200L), syntheticRefDF=syntheticRefDF,
        genoSource="snp-pileup", verbose=33), error_message, fixed=TRUE)
})


#############################################################################
### Tests computePCAMultiSynthetic() results
#############################################################################


context("computePCAMultiSynthetic() results")


test_that("computePCAMultiSynthetic() must return error when gdsProfile is filepath", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    pca <- readRDS(file.path(pathFile, "pca1KG.RDS"))

    samplesRM <- c("HG00246", "HG00325", "HG00611", "HG01173", "HG02165",
        "HG01112", "HG01615", "HG01968", "HG02658", "HG01850", "HG02013",
        "HG02465", "HG02974", "HG03814", "HG03445", "HG03689", "HG03789",
        "NA12751", "NA19107", "NA18548", "NA19075", "NA19475", "NA19712",
        "NA19731", "NA20528", "NA20908")
    names(samplesRM) <- c("GBR", "FIN", "CHS","PUR", "CDX", "CLM", "IBS",
        "PEL", "PJL", "KHV", "ACB", "GWD", "ESN", "BEB", "MSL", "STU", "ITU",
        "CEU", "YRI", "CHB", "JPT", "LWK", "ASW", "MXL", "TSI", "GIH")

    error_message <- "The \'gdsProfile\' must be an object of class \'gds.class\'"

    expect_error(computePCAMultiSynthetic(gdsProfile=fileGDS, listPCA=pca,
        sampleRef=samplesRM, studyIDSyn="MyData", verbose=FALSE), error_message)
})


test_that("computePCAMultiSynthetic() must return error when listPCA is empty list", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    pca <- readRDS(file.path(pathFile, "pca1KG.RDS"))

    samplesRM <- c("HG00246", "HG00325", "HG00611", "HG01173", "HG02165",
        "HG01112", "HG01615", "HG01968", "HG02658", "HG01850", "HG02013",
        "HG02465", "HG02974", "HG03814", "HG03445", "HG03689", "HG03789",
        "NA12751", "NA19107", "NA18548", "NA19075", "NA19475", "NA19712",
        "NA19731", "NA20528", "NA20908")
    names(samplesRM) <- c("GBR", "FIN", "CHS","PUR", "CDX", "CLM", "IBS",
        "PEL", "PJL", "KHV", "ACB", "GWD", "ESN", "BEB", "MSL", "STU", "ITU",
        "CEU", "YRI", "CHB", "JPT", "LWK", "ASW", "MXL", "TSI", "GIH")

    error_message <- paste0("The \'listPCA\' parameter must be a list with ",
                                    "the entries \'pca.unrel\'.")

    expect_error(computePCAMultiSynthetic(gdsProfile=gdsF, listPCA=list(),
        sampleRef=samplesRM, studyIDSyn="MyData", verbose=FALSE), error_message)
})


test_that("computePCAMultiSynthetic() must return error when samplesRM is numeric", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    pca <- readRDS(file.path(pathFile, "pca1KG.RDS"))

    samplesRM <- 33

    error_message <- paste0("The \'sampleRef\' parameter must be a vector of ",
                                "character strings.")

    expect_error(computePCAMultiSynthetic(gdsProfile=gdsF, listPCA=pca,
        sampleRef=samplesRM, studyIDSyn="MyData", verbose=FALSE), error_message)
})


test_that("computePCAMultiSynthetic() must return error when studyIDSyn is vector", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    pca <- readRDS(file.path(pathFile, "pca1KG.RDS"))

    samplesRM <- c("HG00246", "HG00325", "HG00611", "HG01173", "HG02165",
        "HG01112", "HG01615", "HG01968", "HG02658", "HG01850", "HG02013",
        "HG02465", "HG02974", "HG03814", "HG03445", "HG03689", "HG03789",
        "NA12751", "NA19107", "NA18548", "NA19075", "NA19475", "NA19712",
        "NA19731", "NA20528", "NA20908")
    names(samplesRM) <- c("GBR", "FIN", "CHS","PUR", "CDX", "CLM", "IBS",
        "PEL", "PJL", "KHV", "ACB", "GWD", "ESN", "BEB", "MSL", "STU", "ITU",
        "CEU", "YRI", "CHB", "JPT", "LWK", "ASW", "MXL", "TSI", "GIH")

    error_message <- "The \'studyIDSyn\' parameter must be a character string."

    expect_error(computePCAMultiSynthetic(gdsProfile=gdsF, listPCA=pca,
        sampleRef=samplesRM, studyIDSyn=c("MyData", "TEST"), verbose=FALSE),
        error_message)
})


#############################################################################
### Tests computeKNNRefSynthetic() results
#############################################################################


context("computeKNNRefSynthetic() results")


test_that("computeKNNRefSynthetic() must return error when gdsProfile is filepath", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- "The \'gdsProfile\' must be an object of class \'gds.class\'"

    expect_error(computeKNNRefSynthetic(gdsProfile=fileGDS,listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"), studyIDSyn="MyStudy",
        spRef=refKnownSuperPop, fieldPopInfAnc="Superpop", kList=c(10, 11, 12),
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSynthetic() must return error when listCatPop is numeric", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- paste0("The \'listCatPop\' parameter must be a vector of ",
                                "character strings.")

    expect_error(computeKNNRefSynthetic(gdsProfile=gdsF,listEigenvector=pca,
        listCatPop=44, studyIDSyn="MyStudy",
        spRef=refKnownSuperPop, fieldPopInfAnc="Superpop", kList=c(10, 11, 12),
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSynthetic() must return error when studyIDSyn is numeric", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- "The \'studyIDSyn\' parameter must be a character string."

    expect_error(computeKNNRefSynthetic(gdsProfile=gdsF,listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),  studyIDSyn=3,
        spRef=refKnownSuperPop, fieldPopInfAnc="Superpop", kList=c(10, 11, 12),
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSynthetic() must return error when listEigenvector is empty list", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- paste0("The \'listEigenvector\' parameter must be a list",
        " with 3 entries: \'sample.id\', \'eigenvector.ref\' and \'eigenvector\'.")

    expect_error(computeKNNRefSynthetic(gdsProfile=gdsF,listEigenvector=list(),
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),  studyIDSyn="MyData",
        spRef=refKnownSuperPop, fieldPopInfAnc="Superpop", kList=c(10, 11, 12),
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSynthetic() must return error when spRef is numeric", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))

    error_message <- paste0("The \'spRef\' parameter must be a vector of ",
                                    "character strings.")

    expect_error(computeKNNRefSynthetic(gdsProfile=gdsF,listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),  studyIDSyn="MyData",
        spRef=44, fieldPopInfAnc="Superpop", kList=c(10, 11, 12),
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSynthetic() must return error when fieldPopInfAnc is numeric", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- paste0("The \'fieldPopInfAnc\' parameter must be a ",
                                "character string.")

    expect_error(computeKNNRefSynthetic(gdsProfile=gdsF,listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),  studyIDSyn="MyData",
        spRef=refKnownSuperPop, fieldPopInfAnc=22, kList=c(10, 11, 12),
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSynthetic() must return error when kList is character", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- paste0("The \'kList\' parameter must be a vector of ",
        "positive numerics representing the K-neighbors values tested.")

    expect_error(computeKNNRefSynthetic(gdsProfile=gdsF,listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),  studyIDSyn="MyData",
        spRef=refKnownSuperPop, fieldPopInfAnc="Superpop", kList="CANADA",
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSynthetic() must return error when pcaList is character", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")
    fileGDS <- file.path(pathFile, "ex1.gds")

    gdsF <- openfn.gds(fileGDS)
    withr::defer((gdsfmt::closefn.gds(gdsF)), envir=parent.frame())

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- paste0("The \'pcaList\' parameter must be a vector of ",
        "positive numerics representing the PCA dimensions that are tested.")

    expect_error(computeKNNRefSynthetic(gdsProfile=gdsF,listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),  studyIDSyn="MyData",
        spRef=refKnownSuperPop, fieldPopInfAnc="Superpop", pcaList="CANADA",
                                kList=c(13, 14, 15)), error_message)
})


#############################################################################
### Tests computeKNNRefSample() results
#############################################################################


context("computeKNNRefSample() results")


test_that("computeKNNRefSample() must return error when listEigenvector is empty list", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- paste0("The \'listEigenvector\' parameter must be a list with 3 ",
        "entries: \'sample.id\', \'eigenvector.ref\' and \'eigenvector\'.")

    expect_error(computeKNNRefSample(listEigenvector=list(),
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),
        spRef=refKnownSuperPop, fieldPopInfAnc="Superpop", kList=c(10, 11, 12),
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSample() must return error when listEigenvector has multiple samples", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- paste0("Only one profile can be present in the ",
        "\'sample.id\' entry from the \'listEigenvector\' parameter.\n")

    expect_error(computeKNNRefSample(listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"),
        spRef=refKnownSuperPop, fieldPopInfAnc="Superpop", kList=c(10, 11, 12),
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSample() must return error when listCatPop is numeric", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))
    pca$sample.id <- pca$sample.id[1]
    pca$eigenvector <- pca$eigenvector[1, , drop=FALSE]

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- paste0("The \'listCatPop\' parameter must be a vector of ",
                                "character strings.")

    expect_error(computeKNNRefSample(listEigenvector=pca,
        listCatPop=33, spRef=refKnownSuperPop, fieldPopInfAnc="Superpop",
        kList=c(10, 11, 12), pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSample() must return error when spRef is numeric", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))
    pca$sample.id <- pca$sample.id[1]
    pca$eigenvector <- pca$eigenvector[1, , drop=FALSE]

    error_message <- paste0("The \'spRef\' parameter must be a vector of ",
                                "character strings.")

    expect_error(computeKNNRefSample(listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"), spRef=44,
        fieldPopInfAnc="Superpop", kList=c(10, 11, 12),
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSample() must return error when fieldPopInfAnc is vector of strings", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))
    pca$sample.id <- pca$sample.id[1]
    pca$eigenvector <- pca$eigenvector[1, , drop=FALSE]

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- paste0("The \'fieldPopInfAnc\' parameter must be a ",
                                    "character string.")

    expect_error(computeKNNRefSample(listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"), spRef=refKnownSuperPop,
        fieldPopInfAnc=c("Superpop", "B"), kList=c(10, 11, 12),
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSample() must return error when kList is vector of strings", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))
    pca$sample.id <- pca$sample.id[1]
    pca$eigenvector <- pca$eigenvector[1, , drop=FALSE]

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- paste0("The \'kList\' parameter must be a vector of ",
        "positive numerics representing the K-neighbors values tested.")

    expect_error(computeKNNRefSample(listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"), spRef=refKnownSuperPop,
        fieldPopInfAnc="Superpop", kList=c("A", "B"),
        pcaList=c(13, 14, 15)), error_message)
})


test_that("computeKNNRefSample() must return error when pcaList is vector of strings", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))
    pca$sample.id <- pca$sample.id[1]
    pca$eigenvector <- pca$eigenvector[1, , drop=FALSE]

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    error_message <- paste0("The \'pcaList\' parameter must be a vector of ",
        "positive numerics representing the PCA dimensions that are tested.")

    expect_error(computeKNNRefSample(listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"), spRef=refKnownSuperPop,
        fieldPopInfAnc="Superpop", kList=c(13, 14, 15),
        pcaList=c("A", "B")), error_message)
})


test_that("computeKNNRefSample() must return expected results", {

    pathFile <- test_path("fixtures/sampleGDSforPoolSyntheticAncestry")

    pca <- readRDS(file.path(pathFile, "pcaSynthetic.RDS"))
    pca$sample.id <- pca$sample.id[1]
    pca$eigenvector <- pca$eigenvector[1, , drop=FALSE]

    ## The known ancestry for the 1KG reference profiles
    refKnownSuperPop <- readRDS(file.path(pathFile, "knownSuperPop1KG.RDS"))

    set.seed(121)
    result <- computeKNNRefSample(listEigenvector=pca,
        listCatPop=c("EAS", "EUR", "AFR", "AMR", "SAS"), spRef=refKnownSuperPop,
        fieldPopInfAnc="Superpop", kList=NULL,
        pcaList=NULL)

    expect_true(is.list(result))
    expect_equal(names(result), c("sample.id", "matKNN"))
    expect_equal(result$sample.id, "1.ex1.HG00246.1")
    expect_true(is.data.frame(result$matKNN))
    expect_equal(colnames(result$matKNN), c("sample.id", "D", "K", "Superpop"))
    expect_equal(unique(result$matKNN$D), seq(2, 15, by=1))
    expect_equal(unique(result$matKNN$K), seq(2, 15, by=1))
    expect_equal(unique(result$matKNN$Superpop), c("SAS", "EAS", "EUR", "AMR"))
    expect_equal(unique(result$matKNN$sample.id), "1.ex1.HG00246.1")
})
