#-----------------------------------------------
#                 LICENSE
#-----------------------------------------------
# Copyright 2019 Novartis Institutes for BioMedical Research Inc.
# Licensed under the GNU General Public License, Version 3 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://www.r-project.org/Licenses/GPL-3
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


#'@title Pearson correlation of cell type proportions across cell types 
#'and methods
#'
#'@description \code{correlate} computes Pearson correlations between
#'estimated cell type proportions generated by different methods.
#'
#'@details \code{correlation_analysis} is particularly useful to assess the 
#'performance of the different methods when no ground truth is available. 
#'If several methods agree on similar relative abundances of cell types across 
#'samples, the results are more likely to reflect true differences in cell 
#'type composition.
#'
#'@param deconvoluted A list: output object from \code{deconvolute}
#'
#'@param scale Boolean: indicate whether the coefficients should be 
#'transformed to standard scores (default: scale = TRUE).
#'
#'@return Returns a list encompassing two data frames: \itemize{
#'\item{the pearson correlation of coefficients with all other coefficients}
#'\item{summay: summary statistics of all-to-all correlation of coefficients 
#'by cell type}
#'\item{rank: ranking of deconvolution alghoritms by highest all-to-all 
#'correlation of coefficients}
#'\item{rank: ranking of deconvolution alghoritms by highest average 
#'regression all-to-all correlation of coefficients}
#'\item{combinations: combination of methods and signatures tested}
#'}
#'
#'@importFrom stats na.omit
#'@importFrom stats sd
#'@importFrom magrittr set_colnames
#'@importFrom tibble as_tibble
#'@importFrom dplyr select mutate filter group_by summarize ungroup arrange desc
#'@importFrom tidyr gather
#'@importFrom rlang .data
#'
#'@author Vincent Kuettel, Sabina Pfister
#'
#'@examples
#'# load data
#'load_ABIS()
#'
#'# deconvolute
#'decon <- deconvolute(m = bulkRNAseq_ABIS, 
#'sigMatrix = sigMatrix_ABIS_S0)
#'
#'# correlate
#'correl <- correlate(deconvoluted = decon)
#'
#'@export
correlate <- function(deconvoluted, scale = TRUE){

    # get proportions
    dat <- deconvoluted$proportions 

    # specify column names and combine lists into single data frame
    dat_com <- as.data.frame(lapply(seq_along(dat), function(x){
        set_colnames(x = dat[[x]], 
            value = paste(colnames(dat[[x]]), 
            names(dat)[[x]], sep = '_'))
    }))

    # scale
    if (scale)
        dat_com <- scale(dat_com) 

    # compute correlation
    cor <- as.data.frame(apply(cor(dat_com), 1, as.numeric))
    rownames(cor) <- colnames(cor)

    # results 
    res <- list(correlation = cor)

    # remove NAs
    cor_big <- as.matrix(res$correlation)
    cor_big <- cor_big[
        !apply(cor_big, 1, function(x) sum(na.omit(x))==1),
        !apply(cor_big, 2, function(x) sum(na.omit(x))==1)]

    # all-to-all correlations
    all <- as_tibble(cor_big) %>%
        mutate(rowA = rownames(cor_big)) %>%
        gather("rowB","correlation", -.data$rowA) %>%
        mutate(cellTypeA = sub("_.*","",.data$rowA)) %>%
        mutate(cellTypeB = sub("_.*","",.data$rowB)) %>%
        mutate(modelA = sub(".*?_","",.data$rowA)) %>%
        mutate(modelB = sub(".*?_","",.data$rowB)) %>%
        mutate(methodA = sub("_.*","",.data$modelA)) %>%
        mutate(methodB = sub("_.*","",.data$modelB)) %>%
        mutate(signatureA = sub(".*?_","",.data$modelA)) %>%
        mutate(signatureB = sub(".*?_","",.data$modelB)) %>%
        filter(.data$cellTypeA==.data$cellTypeB) %>%
        filter(.data$signatureA==.data$signatureB) %>%
        filter(.data$methodA!=.data$methodB) %>%
        mutate(method = .data$methodA) %>%
        mutate(signature = .data$signatureA) %>%
        mutate(cellType = .data$cellTypeA)

    # summary
    res$summary <- all %>%
        select(.data$signature,.data$cellType,.data$correlation) %>%
        group_by(.data$signature,.data$cellType) %>%
        summarize(
                mean_correlation=round(mean(.data$correlation),4)) %>%
        ungroup() %>%
        select(.data$signature,.data$cellType,.data$mean_correlation) %>%
        as.data.frame()

    # rank
    res$rank <- all %>%
        select(.data$signature,.data$method,.data$correlation) %>%
        group_by(.data$signature,.data$method,) %>%
        summarize(
                mean_correlation=round(mean(.data$correlation),4)) %>%
        ungroup() %>%
        select(.data$signature,.data$method,.data$mean_correlation) %>%
        arrange(desc(.data$mean_correlation)) %>%
        as.data.frame()

    # combinations
    res$combinations <- deconvoluted$combinations

    return(res)
}
