######## GRN graph functions ########
#' Builds a graph out of a set of connections
#' 
#' This function requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}}
#'
#' @template GRN
#' @param model_TF_gene_nodes_separately \code{TRUE} or \code{FALSE}.  Default \code{FALSE}. Should TF and gene nodes be modeled separately? If set to \code{TRUE},this may lead to unwanted effects in case of TF-TF connections (i.e., a TF regulating another TF)
#' @param allowLoops \code{TRUE} or \code{FALSE}.  Default \code{FALSE}. Allow loops in the network (i.e., a TF that regulates itself)
#' @param removeMultiple \code{TRUE} or \code{FALSE}.  Default \code{FALSE}. Remove loops with the same start and end point? This can happen if multiple TF originate from the same gene, for example.
#' @param directed \code{TRUE} or \code{FALSE}.  Default \code{FALSE}. Should the network be directed?
#' @template forceRerun
#' @export
#' @seealso \code{\link{filterGRNAndConnectGenes}}
#' @examples 
#' # See the Workflow vignette on the GRaNIE website for examples
#' GRN = loadExampleObject()
#' GRN = build_eGRN_graph(GRN, forceRerun = FALSE)
#' @return An updated \code{\linkS4class{GRN}} object, with the graph(s) being stored in the slot `graph` (i.e., `GRN@graph` for both TF-gene and TF-peak-gene graphs)
build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, 
                             allowLoops = FALSE, removeMultiple = FALSE, directed = FALSE, forceRerun = FALSE) {
  
  start = Sys.time()  
  checkmate::assertClass(GRN, "GRN")
  GRN = .addFunctionLogToObject(GRN)
  
  GRN = .makeObjectCompatible(GRN)
  
  checkmate::assertFlag(model_TF_gene_nodes_separately)
  checkmate::assertFlag(allowLoops)
  checkmate::assertFlag(removeMultiple)
  checkmate::assertFlag(directed)
  checkmate::assertFlag(forceRerun)
  
  # This function returns the tf-peak-gene and the tf-gene graphs in dataframe format
  # tf-peak-gene graph is weighted (r), tf-gene graph is unweighted
  
  if (is.null(GRN@graph$TF_gene) | is.null(GRN@graph$TF_peak_gene) | forceRerun) {
      
   .checkConnections(GRN, throwError = TRUE)
    
    # Should the TF nodes and gene nodes represent the same or different nodes? 
    # If set to TRUE, the new default, self-loops can happen and the graph is not strictly tripartite anymore
    if (model_TF_gene_nodes_separately) {
      TF_peak.df = GRN@connections$all.filtered[["0"]] %>%
        dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>% 
        dplyr::select(c("TF.ID", "peak.ID", "TF.ENSEMBL", "TF_peak.r")) %>%
        stats::na.omit() %>% 
        dplyr::mutate(V2_name = NA) %>%
        unique() %>%
        dplyr::rename(V1 = "TF.ID", V2 = "peak.ID", V1_name = "TF.ENSEMBL", r = "TF_peak.r") %>%
        dplyr::mutate_at(c("V1","V2"), as.vector)
    } else {
      # Get Ensembl ID for TFs here to make a clean join and force a TF that is regulated by a peak to be the same node
      TF_peak.df = GRN@connections$all.filtered[["0"]] %>%
        dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>% 
        dplyr::select(c("TF.ENSEMBL", "peak.ID", "TF.ID", "TF_peak.r")) %>%
        stats::na.omit() %>% 
        dplyr::mutate(V2_name = NA) %>%
        unique() %>%
        dplyr::rename(V1 = "TF.ENSEMBL", V2 = "peak.ID", V1_name = "TF.ID", r = "TF_peak.r") %>%
        dplyr::mutate_at(c("V1","V2"), as.vector)
    }
    
    
    peak_gene.df = GRN@connections$all.filtered[["0"]][,c("peak.ID", "gene.ENSEMBL", "gene.name", "peak_gene.r")] %>% 
      stats::na.omit() %>% 
      dplyr::mutate(V1_name = NA) %>%
      unique() %>%
      dplyr::rename(V1 = "peak.ID", V2 = "gene.ENSEMBL", V2_name = "gene.name", r = "peak_gene.r") %>%
      dplyr::mutate_at(c("V1","V2"), as.vector)
    
    TF_peak_gene.df = dplyr::bind_rows(list(`tf-peak` = TF_peak.df, `peak-gene` = peak_gene.df), .id = "connectionType") %>%
      dplyr::select("V1", "V2", "V1_name", "V2_name", "r", "connectionType")
    
    TF_gene.df = dplyr::inner_join(TF_peak.df, peak_gene.df, by = c("V2" = "V1"), multiple = "all", suffix = c(".TF_peak", ".peak_gene")) %>% 
      dplyr::select("V1", "V2.peak_gene", "V1_name.TF_peak", "V2_name.peak_gene") %>%
      dplyr::rename(V1_name = "V1_name.TF_peak", V2 = "V2.peak_gene", V2_name = "V2_name.peak_gene") %>%
      dplyr::distinct() %>%
      dplyr::mutate(connectionType = "tf-gene") 
    
    # If the graph is NOT directed, retrieving the graph structure as data frame may result in V1 and V2 switched
    # This happens when TF-TF interactions occur. The order (V1, V2) is irrelevant for an undirected graph anyway
    futile.logger::flog.info(paste0("Building TF-peak-gene graph..."))
    GRN@graph$TF_peak_gene = list(table = TF_peak_gene.df,
                                  graph = .buildGraph(TF_peak_gene.df, 
                                                      directed = directed, 
                                                      allowLoops = allowLoops, 
                                                      removeMultiple = removeMultiple))
    
    futile.logger::flog.info(paste0("Building TF-gene graph..."))
    GRN@graph$TF_gene      = list(table = TF_gene.df,
                                  graph = .buildGraph(TF_gene.df, 
                                                      directed = directed, 
                                                      allowLoops = allowLoops, 
                                                      removeMultiple = removeMultiple))
    
    
    GRN@graph$parameters = list()
    GRN@graph$parameters$directed       = directed
    GRN@graph$parameters$allowLoops     = allowLoops
    GRN@graph$parameters$removeMultiple = removeMultiple
    
  }  else {
      .printDataAlreadyExistsMessage()
  }
  
  .printExecutionTime(start)
  
  GRN
  
}

.checkConnections <- function(GRN, throwError = TRUE) {
    
    if (is.null(GRN@connections$all.filtered$`0`)) {
        message = "Slot GRN@connections$all.filtered not found. Run the function filterGRNAndConnectGenes first."
        
        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
    }
    
    
    if (nrow(GRN@connections$all.filtered$`0`) == 0) {
        
        message = "There are no connections in the filtered GRN. Make sure you run the function filterGRNAndConnectGenes and that the final eGRN has connections."
        
        .checkAndLogWarningsAndErrors(NULL, message, isWarning = !throwError)
    }
    
}

.buildGraph <- function(df, directed, allowLoops, removeMultiple = FALSE, silent = FALSE) {
  
  # Remove V1_name and V2_name as igraph treats additional columns as edge attribute, which can become messed up as it here refers to vertice attribute
  df_mod = df %>% dplyr::select(-"V1_name", -"V2_name")
  
  TF_vertices = df %>%
    dplyr::select("V1", "V1_name") %>% 
    dplyr::rename(nodeID = "V1") %>%
    dplyr::distinct() %>%
    dplyr::group_by(.data$nodeID) %>%
    dplyr::summarise(names_TF_all = paste0(.data$V1_name, collapse = "|"),
                     nTF = dplyr::n(),
                     isTF = TRUE, .groups = "keep") %>%
    dplyr::ungroup()
  
  gene_vertices = df %>%
    dplyr::select("V2", "V2_name") %>% 
    dplyr::distinct() %>%
    dplyr::mutate(isGene = TRUE) %>%
    dplyr::rename(names_gene = "V2_name", nodeID = "V2") %>%
    dplyr::ungroup()
  
  # Combine vertex metadata
  vertexMetadata = dplyr::full_join(TF_vertices, gene_vertices, by = "nodeID")
  
  # Fix the isTF column
  vertexMetadata$isTF[is.na(vertexMetadata$isTF)] = FALSE
  
  graph = igraph::graph_from_data_frame(d = df_mod, directed = directed, vertices = vertexMetadata)
  
  if (!igraph::is_simple(graph)) {
    if (!silent) futile.logger::flog.info(paste0(" Graph contains either loops and/or multiple edges. A simplification is possible."))
    
    .printLoopsGraph(df, silent = silent)
    .printMultipleEdges(df, silent = silent)
    
    if (removeMultiple | !allowLoops) {
      if (!silent) futile.logger::flog.info(paste0(" Simplify graph..."))
      graph <- igraph::simplify(graph, remove.multiple = removeMultiple, remove.loops = !allowLoops)
    } else {
      if (!silent) futile.logger::flog.info(paste0(" Not doing any graph simplification, see the parameters removeMultiple and allowLoops to change it."))
    }
    
  }
  
  .printGraphSummary(graph, silent = silent)
  
  if (!silent) futile.logger::flog.info(paste0(" Done. Graphs are saved in GRN@graph"))
  
  graph
}

.printLoopsGraph <- function(graph_table, silent = FALSE) {
  
  loop_vertices = graph_table %>%
    dplyr::filter(.data$V1 == .data$V2) %>%
    dplyr::mutate(V1_name_combined = paste0(.data$V1, " (", .data$V1_name, ")")) %>%
    dplyr::pull(.data$V1_name_combined)
  
  if (length(loop_vertices) > 0) {
    if (!silent) futile.logger::flog.info(paste0(" The following nodes / vertices have loop edges (TF regulating itself):\n", paste0(loop_vertices, collapse = ", ")))
  }
  
}

.printMultipleEdges <- function(graph_table, silent = FALSE) {
  
  
  multipleEdges = graph_table %>%
    dplyr::group_by(.data$V1, .data$V2) %>% 
    dplyr::summarize(n = dplyr::n(), .groups = "keep") %>% 
    dplyr::ungroup() %>%
    dplyr::filter(.data$n > 1)
  
  if (nrow(multipleEdges) > 0) {
    if (!silent) futile.logger::flog.info(paste0(" ", nrow(multipleEdges), " edges have the same vertices. This is often caused by multiple TF belonging to the same gene ID."))
  }
  
}


.printGraphSummary <- function(graph, silent = FALSE) {
  if (!silent) futile.logger::flog.info(paste0(" Graph summary:"))
  nVertices = length(igraph::V(graph))
  nEdges = length(igraph::E(graph))
  
  if (!silent) futile.logger::flog.info(paste0("  Nodes (vertices): ", nVertices))
  if (!silent) futile.logger::flog.info(paste0("  Edges: ", nEdges))
  
}



#' Filter connections for subsequent visualization with `visualizeGRN()` from the filtered eGRN
#' 
#' This helper function provides an easy and flexible way to retain particular connections for plotting and discard all others. Note that this filtering is only
#' relevant and applicable for the function `visualizeGRN()` and ignored anywhere else. This makes it possible to visualize only specific TF regulons or to plot only
#' connections that fulfill particular filter criteria. Due to the flexibility of the implementation by allowing arbitrary filters that are passed directly to
#' \code{dplyr::filter}, users can visually investigate the eGRN, which is particularly useful when the eGRNs is large and has many connections.

#' @template GRN
#' @param plotAll \code{TRUE} or \code{FALSE}. Default \code{TRUE}. Should all connections be included for plotting? 
#' If set to \code{TRUE}, all connections are marked for plotting and everything else is ignored. This resets any previous setting.
#' If set \code{FALSE}, the filter expressions (if any) are used to determine which connection to plot
#' @param ... An arbitrary set of arguments that is used directly, without modification, as input for dplyr::filter and therefore has to be valid expression that dplyr::filter understands.
#' The filtering is based on the \code{all.filtered} table as stored in \code{GRN@connections$all.filtered$`0`}. Thus, the specific filters can be completely
#' arbitrary for ultimate flexibility and must only adhere to the column names and types as defined in \code{GRN@connections$all.filtered$`0`}. See the examples also for what you can do. 
#' @template forceRerun
#' @export
#' @examples 
#' # See the Workflow vignette on the GRaNIE website for examples
#' GRN = loadExampleObject()
#' GRN = filterConnectionsForPlotting (GRN, plotAll = FALSE, TF.ID == "E2F6.0.A")
#' GRN = filterConnectionsForPlotting (GRN, plotAll = FALSE, TF_peak.r > 0.7 | TF_peak.fdr < 0.2)
#' GRN = filterConnectionsForPlotting (GRN, plotAll = FALSE, TF_peak.r > 0.7, TF_peak.fdr < 0.2)
#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function.
filterConnectionsForPlotting <- function(GRN, 
                                         plotAll = TRUE,
                                         ..., 
                                         forceRerun = FALSE) {
    
    start = Sys.time()
    
    checkmate::assertClass(GRN, "GRN")
    GRN = .addFunctionLogToObject(GRN)
    
    GRN = .makeObjectCompatible(GRN)
    
    checkmate::assertFlag(plotAll)
    checkmate::assertFlag(forceRerun)
    
    if (forceRerun | !"includeForPlotting" %in% colnames(GRN@graph$TF_peak_gene$table) | !"includeForPlotting" %in% colnames(GRN@graph$TF_gene$table)) {
        
        filterArgs <- setdiff(as.character(match.call(expand.dots = TRUE)), as.character(match.call(expand.dots = FALSE)))

        if (plotAll | length(filterArgs) == 0) {
            
            GRN@graph$TF_gene$table$includeForPlotting = TRUE
            GRN@graph$TF_peak_gene$table$includeForPlotting = TRUE
            futile.logger::flog.info(paste0(" Include all connections for GRN visualization"))

        } else {
            
            futile.logger::flog.info(paste0(" Filter connections for GRN visualization"))

            # Reset includeForPlotting, exclude everything first
            GRN@graph$TF_gene$table$includeForPlotting = FALSE
            GRN@graph$TF_peak_gene$table$includeForPlotting = FALSE
            
            # connectionsRetained = dplyr::filter(GRN@connections$all.filtered$`0`, TF.ID == "E2F6.0.A")
           connectionsRetained = dplyr::filter(GRN@connections$all.filtered$`0`, ...)
           
           futile.logger::flog.info(paste0(" Keep connections for a total of ", nrow(connectionsRetained), " connections"))
           
           if (nrow(connectionsRetained) == 0) {
               message = paste0("filterConnectionsForPlotting: No connections are left for plotting. Make sure this was intended.")
               .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
           }
           
           TF_gene.con = paste0(GRN@graph$TF_gene$table$V1, "_", GRN@graph$TF_gene$table$V2)
           TF_gene.con.filt = paste0(connectionsRetained$TF.ENSEMBL, "_", connectionsRetained$gene.ENSEMBL)
          
           
           rowsRetain_TF_gene = which(TF_gene.con %in% TF_gene.con.filt)
           GRN@graph$TF_gene$table$includeForPlotting[rowsRetain_TF_gene] = TRUE
           
           # More complicated for TF-peak-gene because there are 2 different connection types
           # Approach for now is to re-create the split table from the filtered connections
           TF_peak_gene.con.filt2 = connectionsRetained %>%
               dplyr::select("TF.ENSEMBL", "peak.ID", "gene.ENSEMBL") 
           
           TF_peak.filt2 = TF_peak_gene.con.filt2 %>%
               dplyr::mutate(ID_new = paste0(.data$TF.ENSEMBL, "_", .data$peak.ID)) %>%
               dplyr::pull("ID_new") %>%
               unique()
           
           TF_gene.filt2 = TF_peak_gene.con.filt2 %>%
               dplyr::mutate(ID_new = paste0(.data$TF.ENSEMBL, "_", .data$gene.ENSEMBL)) %>%
               dplyr::pull("ID_new") %>%
               unique()
           
          igraph.IDs = GRN@graph$TF_peak_gene$table %>%
               dplyr::mutate(ID_new = paste0(.data$V1, "_", .data$V2)) %>%
               dplyr::pull("ID_new")
          
          GRN@graph$TF_peak_gene$table$includeForPlotting = igraph.IDs %in% c(TF_peak.filt2, TF_gene.filt2)
           
           
        }
        
        
    } else {
        .printDataAlreadyExistsMessage()
    }
    
    GRN

}

#' Perform all network-related statistical and descriptive analyses, including community and enrichment analyses. See the functions it executes in the @seealso section below.
#' 
#' A convenience function that calls all network-related functions in one-go, using selected default parameters and a set of adjustable ones also. 
#' For full adjustment, run the individual functions separately. 
#' This function requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}}

#'
#' @inheritParams calculateGeneralEnrichment
#' @inheritParams plotCommunitiesStats
#' @inheritParams plotCommunitiesEnrichment
#' @inheritParams calculateCommunitiesStats
#' @template maxWidth_nchar_plot
#' @seealso \code{\link{build_eGRN_graph}}
#' @seealso \code{\link{plotGeneralGraphStats}} 
#' @seealso \code{\link{calculateGeneralEnrichment}} 
#' @seealso \code{\link{plotGeneralEnrichment}} 
#' @seealso \code{\link{calculateCommunitiesStats}} 
#' @seealso \code{\link{plotCommunitiesStats}} 
#' @seealso \code{\link{calculateCommunitiesEnrichment}} 
#' @seealso \code{\link{plotCommunitiesEnrichment}} 
#' @seealso \code{\link{calculateTFEnrichment}} 
#' @seealso \code{\link{plotTFEnrichment}} 
#' @export
#' @examples 
#' # See the Workflow vignette on the GRaNIE website for examples
#' # GRN = loadExampleObject()
#' # GRN = performAllNetworkAnalyses(GRN, outputFolder = ".", forceRerun = FALSE)
#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function.
performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"), 
                                      algorithm = "weight01", statistic = "fisher",
                                      background = "neighborhood", 
                                      clustering = "louvain",
                                      communities = NULL, selection = "byRank",
                                      topnGenes = 20, topnTFs = 20,
                                      maxWidth_nchar_plot = 50,
                                      display_pAdj = FALSE,
                                      outputFolder = NULL,
                                      forceRerun = FALSE) {
  
  start = Sys.time()
  
  checkmate::assertClass(GRN, "GRN")
  GRN = .addFunctionLogToObject(GRN)
  
  GRN = .makeObjectCompatible(GRN)
  
  GRN = build_eGRN_graph(GRN, model_TF_gene_nodes_separately = FALSE, allowLoops = FALSE, directed = FALSE, removeMultiple = FALSE)
  
  GRN = plotGeneralGraphStats(GRN, outputFolder = outputFolder, forceRerun = forceRerun) 
  
  GRN = calculateGeneralEnrichment(GRN, ontology = ontology, algorithm = algorithm, statistic = statistic, 
                                   background = background, forceRerun = forceRerun)
  GRN = plotGeneralEnrichment(GRN, outputFolder = outputFolder, display_pAdj = display_pAdj, 
                              maxWidth_nchar_plot = maxWidth_nchar_plot, forceRerun = forceRerun) 
  
  
  GRN = calculateCommunitiesStats(GRN, clustering = clustering, forceRerun = forceRerun)
  
  GRN = plotCommunitiesStats(GRN, outputFolder = outputFolder, selection = selection, communities = communities, 
                                  forceRerun = forceRerun, topnGenes = topnGenes, topnTFs = topnTFs)
  
  GRN = calculateCommunitiesEnrichment(GRN, ontology = ontology, algorithm = algorithm, statistic = statistic, 
                                       selection = selection, communities = communities,
                                       background = background, forceRerun = forceRerun)
  
  GRN = plotCommunitiesEnrichment(GRN, outputFolder = outputFolder, 
                                  selection = selection, communities = communities,
                                  display_pAdj = display_pAdj,  maxWidth_nchar_plot = maxWidth_nchar_plot,
                                  forceRerun = forceRerun)
  
  GRN = calculateTFEnrichment(GRN, ontology = ontology, algorithm = algorithm, statistic = statistic,
                              background = background, pAdjustMethod = "BH",
                              forceRerun = forceRerun)
  
  GRN = plotTFEnrichment(GRN, display_pAdj = display_pAdj, outputFolder = outputFolder, maxWidth_nchar_plot = maxWidth_nchar_plot,
                         forceRerun = forceRerun)
  
  
  .printExecutionTime(start)
  
  GRN
  
}



# Retrieve set of background genes (as vector) used for enrichment analyses from a GRN object
.getBackgroundGenes <- function(GRN, type = "neighborhood", gene.types = "all") {
  
  checkmate::assertChoice(type, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood"))
  
  if (type == "all_annotated") {
    
    backgroundGenes = GRN@annotation$genes$gene.ENSEMBL

  } else if (type == "all_RNA") {
    
      backgroundGenes = GRN@data$RNA$counts_metadata %>%
          dplyr::pull(.data$ID)
    
    
  } else if (type == "all_RNA_filtered") {
      
      backgroundGenes = GRN@data$RNA$counts_metadata %>%
          dplyr::filter(!.data$isFiltered) %>%
          dplyr::pull(.data$ID)
      
      
  } else if (type == "neighborhood") {
    
    # Retrieve only those who are in the neighborhood of genes
    backgroundGenes = levels(GRN@connections$peak_genes[["0"]]$gene.ENSEMBL)
  }
    
   
  # Filter genes by gene.type.  
  if (gene.types != "all") {
      backgroundGenes = dplyr::filter(GRN@annotation$genes, 
                                      .data$gene.ENSEMBL %in% backgroundGenes,
                                      .data$gene.type %in% gene.types) %>%
                        dplyr::pull(.data$gene.ENSEMBL)
  }
  
  
  backgroundGenes 
  
}



#' Run an enrichment analysis for the genes in the whole network in the filtered \code{\linkS4class{GRN}} object
#' 
#' The enrichment analysis is based on the whole network, see \code{\link{calculateCommunitiesEnrichment}} and \code{\link{calculateTFEnrichment}} for 
#' community- and TF-specific enrichment, respectively.
#' This function requires the existence of the eGRN graph in the \code{\linkS4class{GRN}} object as produced by \code{\link{build_eGRN_graph}}.
#' Results can subsequently be visualized with the function \code{\link{plotGeneralEnrichment}}.
#' 
#' All enrichment functions use the TF-gene graph as defined in the `GRN` object. See the `ontology` argument for currently supported ontologies.
#' Also note that some parameter combinations for `algorithm` and `statistic` are incompatible, an error message will be thrown in such a case.
#' 
#' @template GRN
#' @param ontology Character vector of ontologies. Default \code{c("GO_BP", "GO_MF")}. 
#' Valid values are \code{"GO_BP"}, \code{"GO_MF"}, \code{"GO_CC"}, \code{"KEGG"}, \code{"DO"}, and \code{"Reactome"}, 
#' referring to \emph{GO Biological Process}, \emph{GO Molecular Function}, \emph{GO Cellular Component}, \emph{KEGG}, \emph{Disease Ontology}, 
#' and \emph{Reactome Pathways}, respectively. \code{GO} ontologies require the \code{topGO}, 
#' \code{"KEGG"} the \code{clusterProfiler}, \code{"DO"} the \code{DOSE}, and \code{"Reactome"} the \code{ReactomePA} packages, respectively.
#' As they are listed under \code{Suggests}, they may not yet be installed, and the function will throw an error if they are missing.
#' @param algorithm Character. Default \code{"weight01"}. One of: \code{"classic"}, \code{"elim"}, \code{"weight"}, \code{"weight01"}, \code{"lea"}, \code{"parentchild"}. Only relevant if ontology is GO related (GO_BP, GO_MF, GO_CC), ignored otherwise. Name of the algorithm that handles the GO graph structures. Valid inputs are those supported by the \code{topGO} library. 
#' For general information about the algorithms, see \url{https://academic.oup.com/bioinformatics/article/22/13/1600/193669}. \code{weight01} is a mixture between the \code{elim} and the \code{weight} algorithms.
#' @param statistic Character. Default \code{"fisher"}. One of: \code{"fisher"}, \code{"ks"}, \code{"t"}. Statistical test to be used. Only relevant if ontology is GO related (\code{GO_BP}, \code{GO_MF}, \code{GO_CC}), and valid inputs are a subset of those supported by the \code{topGO} library (we had to remove some as they do not seem to work properly in \code{topGO} either), ignored otherwise. For the other ontologies the test statistic is always Fisher. 
#' @param background Character. Default \code{"neighborhood"}. One of: \code{"all_annotated"}, \code{"all_RNA"}, \code{"all_RNA_filtered"}, \code{"neighborhood"}. Set of genes to be used to construct the background for the enrichment analysis. This can either be all annotated genes in the reference genome (\code{all_annotated}), all genes from the provided RNA data (\code{all_RNA}), all genes from the provided RNA data excluding those marked as filtered after executing \code{filterData} (\code{all_RNA_filtered}), or all the genes that are within the neighborhood of any peak (before applying any filters except for the user-defined \code{promoterRange} value in \code{addConnections_peak_gene}) (\code{neighborhood}).
#' @param background_geneTypes Character vector of gene types that should be considered for the background. Default \code{"all"}. 
#' Only gene types as defined in the \code{\linkS4class{GRN}} object, slot \code{GRN@annotation$genes$gene.type} are allowed. 
#' The special keyword \code{"all"} means no filter on gene type.
#' @param pAdjustMethod Character. Default \code{"BH"}. One of: \code{"holm"}, \code{"hochberg"}, \code{"hommel"}, \code{"bonferroni"}, \code{"BH"}, \code{"BY"}, \code{"fdr"}. This parameter is only relevant for the following ontologies: KEGG, DO, Reactome. For the other ontologies, the algorithm serves as an adjustment.
#' @template forceRerun
#' @return An updated \code{\linkS4class{GRN}} object, with the enrichment results stored in the \code{stats$Enrichment$general} slot.
#' @seealso \code{\link{plotGeneralEnrichment}}
#' @seealso \code{\link{calculateCommunitiesEnrichment}}
#' @seealso \code{\link{calculateTFEnrichment}}
#' @seealso \code{\link{plotCommunitiesEnrichment}}
#' @examples 
#' # See the Workflow vignette on the GRaNIE website for examples
#' GRN =  loadExampleObject()
#' GRN =  calculateGeneralEnrichment(GRN, ontology = "GO_BP", forceRerun = FALSE)
#' @export
calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), 
                                       algorithm = "weight01", statistic = "fisher",
                                       background = "neighborhood",  background_geneTypes = "all",
                                       pAdjustMethod = "BH", forceRerun = FALSE) {
  
  start = Sys.time()
  checkmate::assertClass(GRN, "GRN")
  GRN = .addFunctionLogToObject(GRN)
  
  GRN = .makeObjectCompatible(GRN)
  
  checkmate::assertSubset(ontology , c("GO_BP", "GO_MF", "GO_CC", "KEGG", "DO", "Reactome"), empty.ok = FALSE)
  
  .checkPackage_topGO_and_arguments(ontology, algorithm, statistic)

  checkmate::assertChoice(background, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood"))
  checkmate::assertSubset(background_geneTypes, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE)
  checkmate::assertChoice(pAdjustMethod, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"))
  checkmate::assertFlag(forceRerun)
  
  .checkConnections(GRN, throwError = TRUE)
  .checkGraphExistance(GRN)
  
  futile.logger::flog.info(paste0("Calculating general enrichment. This may take a while"))
  
  
  mapping = .getGenomeObject(GRN@config$parameters$genomeAssembly, type = "packageName")
  backgroundGenes = .getBackgroundGenes(GRN, type = background, gene.types = background_geneTypes)
  
 
  
  if (is.null(GRN@stats[["Enrichment"]][["general"]])) {
    GRN@stats[["Enrichment"]][["general"]] = list()
  }
  
  for (ontologyCur in ontology) {
    
    if (is.null(GRN@stats[["Enrichment"]][["general"]][[ontologyCur]]) | forceRerun) {
      
      # run general enrichment analysis and store tabulated results in GRN object
      # Only use the "targets", i.e., genes as foreground because it would artificially enrich for TF terms, such as "DNA-binding" "transcription activation" type terms.
      GRN@stats[["Enrichment"]][["general"]][[ontologyCur]] =
        .runEnrichment(GRN,
                       foreground = GRN@graph$TF_gene$table$V2, 
                       background = backgroundGenes,
                       backgroundStr = background, 
                       ontology = ontologyCur, algorithm = algorithm, statistic = statistic,
                       mapping = mapping,
                       pAdjustMethod =  pAdjustMethod)
      
      futile.logger::flog.info(paste0("Result stored in GRN@stats$Enrichment$general$", ontologyCur, "$results"))
      
    }  else {
        .printDataAlreadyExistsMessage()
    }
    
  }
  
  
  
  
  
  .printExecutionTime(start, prefix = "")
  
  GRN
}


.combineEnrichmentResults <- function(GRN, type, ontology, p, nSignificant, display_pAdj) {
  
  if (type == "byCommunity") {
    idMerge = "community"
  } else if (type == "byTF") {
    idMerge = "TF.ID"
  }
  
  # Merge all community-specific results to one data frame
  resultsCombined.df = suppressWarnings(GRN@stats[["Enrichment"]][[type]] %>%
                                          lapply(function(x) {x[[ontology]]$results}) %>%
                                          dplyr::bind_rows(.id = idMerge) %>%
                                          dplyr::select(-tidyselect::starts_with("topG")) %>%
                                          dplyr::mutate(pval = as.numeric(.data$pval)) %>%
                                          tibble::as_tibble())
  
  # p-adjust only available for non-GO ontologies
  if (display_pAdj && !stringr::str_starts(ontology, "GO_")) {
    resultsCombined.df$pval = resultsCombined.df$p.adjust
  }
  
  # Add general enrichment
  
  if (is.null(GRN@stats$Enrichment$general[[ontology]]$results)) {
    message = paste0("Could not find enrichment results for general enrichment for ontology ", ontology, ". Please (re)run the function calculateGeneralEnrichment for the ontology ", ontology)
    .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
  }
  
  enrichmentGeneral = GRN@stats$Enrichment$general[[ontology]]$results %>%
    dplyr::mutate({{idMerge}} := "all") %>%
    dplyr::select(colnames(resultsCombined.df))
  
  # get enriched terms from general enrichment and make sure it is kept for the community enrichment
  enrichedTermsGeneral = enrichmentGeneral %>% 
    dplyr::filter(.data$pval <= p, .data$Found >= nSignificant) %>% 
    dplyr::pull(.data$ID)
  
  enrichedTermsGrouped = resultsCombined.df %>% 
    dplyr::filter(.data$pval <= p, .data$Found >= nSignificant) %>% 
    dplyr::pull(.data$ID)
  
  all.df = resultsCombined.df %>%
    rbind(enrichmentGeneral) %>%
    dplyr::mutate(ID = as.factor(.data$ID),
                  pval = as.numeric(gsub(">|<", "", .data$pval))) %>%
    dplyr::filter(.data$pval <= p & (.data$Found >= nSignificant | .data$ID %in% c(enrichedTermsGeneral, enrichedTermsGrouped)))
  
  all.df[, idMerge] = as.factor(all.df[, idMerge, drop = TRUE])
  
  all.df
  
}

.checkEnrichmentCongruence_general <- function(GRN, type = "community") {
  
  allOntologiesGeneral = sort(names(GRN@stats$Enrichment$general))
  
  if (type == "community") {
    allOntologiesGroup1 = sort(names(GRN@stats$Enrichment$byCommunity[[1]]))
  } else if (type == "TF") {
    allOntologiesGroup1 = sort(names(GRN@stats$Enrichment$byTF[[1]]))
  }
  
  if (!identical(allOntologiesGeneral, allOntologiesGroup1)) {
    message = paste0(".checkEnrichmentCongruence_general: General enrichment and ", type, " enrichment do not have the same ontologies precalculated (\"",
                     paste0(allOntologiesGeneral, collapse = " & "), "\" vs. \"", 
                     paste0(allOntologiesGroup1, collapse = " & "), "\"). ",
                     "Rerun one of the enrichment functions (general, community, or TF) and add the missing ontology")
    .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
  }
  
  res.l = list(allOntologiesGroup1, allOntologiesGeneral)
  names(res.l) = c(type, "all")
  
  res.l
  
}



#' @importFrom biomaRt useEnsembl getBM
.runEnrichment <- function(GRN, foreground, background, backgroundStr, ontology, 
                           description = "Enrichment Analysis",
                           algorithm="weight01", statistic = "fisher", mapping, pAdjustMethod = "BH", minGSSize = 0, maxGSSize = 5000) {
  
  
  result.list = list()
  # Implementation change: Allow only one ontology term here, and force the calling function to handle multiple ontologies.
  # Advantage: Prevents recalculation if enrichment for ontology has already been calculated
  checkmate::assertCharacter(ontology, len = 1)
  
  
  foreground = as.character(foreground) %>% unique()
  background = as.character(background) %>% unique()
  
  nForeground = length(foreground)
  nBackground = length(background)
  
  
  if (ontology %in% c("KEGG", "DO", "Reactome")) {
    # the ENSEMBL IDs will need to be mapped to Entrez IDs for these ontologies
    
    if (statistic != "fisher") {
      statistic = "fisher"
      message = paste0(".runEnrichment: For KEGG, DO and Reacome enrichment, the parameter \"statistic\" can only be \"fisher\". It has been changed accordingly.")
      .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
    }

    params.l = .getBiomartParameters(GRN@config$parameters$genomeAssembly)
    
    errorOcured = FALSE
    
    ensembl = tryCatch({ 
        biomaRt::useEnsembl(biomart = "genes", host = params.l[["host"]], dataset = params.l[["dataset"]])
        
    }, error = function(e) {
      errorOcured = TRUE 
    }
    )
    
    
    foreground_entrez = tryCatch({ 
      biomaRt::getBM(mart = ensembl,
                     attributes =  "entrezgene_id",
                     filters = "external_gene_name",
                     values = GRN@annotation$genes$gene.name[
                       match(foreground, GRN@annotation$genes$gene.ENSEMBL)])[,1] %>%
        stats::na.omit() %>% as.character()
      
    }, error = function(e) {
      errorOcured = TRUE 
    }
    )
    
    background_entrez = tryCatch({ 
      biomaRt::getBM(mart = ensembl,
                     attributes = "entrezgene_id",
                     filters = "external_gene_name",
                     values = GRN@annotation$genes$gene.name[match(background, GRN@annotation$genes$gene.ENSEMBL)])[,1] %>%
        stats::na.omit() %>% as.character()
      
    }, error = function(e) {
      errorOcured = TRUE 
    }
    )
    
    if (errorOcured) {
      
      error_Biomart = ".runEnrichment: A temporary error occured with biomaRt::getBM or biomaRt::useEnsembl. This is often caused by an unresponsive Ensembl site and may be caused by the ontology type (e.g, it may work for the GO ontologies but not for KEGG). Try again at a later time or change ontologies. For now, this ontology has been skipped. Note that this error is not caused by GRaNIE but external services."
      .checkAndLogWarningsAndErrors(NULL, error_Biomart, isWarning = TRUE)
      return(NULL)
      
    }
    
    
  }

  
  geneList = factor(as.integer(unique(background) %in% unique(foreground)))
  names(geneList) = unique(background)
  
  # Catch cases where none of the foreground genes are in the background
  if (nlevels(geneList) < 2) {
      error_Biomart = ".runEnrichment: None of the foreground genes are part of the background. This may happen, for example, if the background is filtered by the gene type (background_geneTypes). Thus, no enrichment can be calculated."
      .checkAndLogWarningsAndErrors(NULL, error_Biomart, isWarning = TRUE)
      
      result.list[["results"]] = tibble::tribble(~ID, ~Term, ~Annotated, ~Found, ~Expected, ~pval, ~GeneRatio, ~gene.ENSEMBL_foreground)
  } else {
      
      futile.logger::flog.info(paste0("   Running enrichment analysis for ontology ", ontology, " using ", nForeground, " and ", nBackground, " genes as foreground and background (", backgroundStr, "), respectively. This may take a while."))
      
      
      if (ontology %in% c("GO_BP","GO_MF","GO_CC")) {
          
          # https://support.bioconductor.org/p/9141171/
          
          # go_enrichment =  
          #     clusterProfiler::enrichGO(
          #         gene = foreground_entrez,
          #         OrgDb = 'org.Hs.eg.db', 
          #         ont = sub("GO_", "", ontology),
          #         universe = background_entrez,
          #         keyType = "ENTREZID",
          #         pvalueCutoff = 1,
          #         qvalueCutoff = 1,
          #         minGSSize = minGSSize,
          #         maxGSSize = maxGSSize,
          #         pAdjustMethod = pAdjustMethod)
          
          # go.res.new = .createEnrichmentTable(go_enrichment)
          
          # The need of p-value adjustment: https://bioconductor.org/packages/devel/bioc/vignettes/topGO/inst/doc/topGO.pdf
          
          go_enrichment = suppressMessages(new("topGOdata",
                                               ontology = gsub("GO_", "", ontology),
                                               allGenes = geneList,
                                               description = description,
                                               nodeSize = 5,
                                               annot = topGO::annFUN.org,
                                               mapping = mapping, 
                                               ID = "ensembl"))
          
          # retrieve genes2GO list from the "expanded" annotation in GOdata
          allGO = topGO::genesInTerm(go_enrichment)
          allGO_inForeground = lapply(allGO, function(x) paste0(x[x %in% foreground], collapse = ",")) %>%
              as.data.frame() %>% t() 
          
          allGO_inForeground.df = tibble::tibble(ID = rownames(allGO_inForeground), gene.ENSEMBL_foreground = allGO_inForeground[, 1]) %>%
              dplyr::mutate(ID = sub(".", ":", .data$ID, fixed = TRUE))
          
          
          
          result = suppressMessages(topGO::runTest(go_enrichment, algorithm = algorithm, statistic = statistic))
          # Dont trim GO terms here, happens later when plotting
          result.tbl = unique(topGO::GenTable(go_enrichment, pval = result, orderBy = "pval", numChar = 1000, 
                                              topNodes = length(topGO::score(result))) ) %>%
              dplyr::rename(ID = "GO.ID", Found = "Significant")  %>%      # make it more clear what Significant refers to here
              dplyr::mutate(GeneRatio = .data$Found / nForeground) %>%
              dplyr::left_join(allGO_inForeground.df, by = "ID")
          
          
          result.list[["results"]] = result.tbl

      }
  }
  
  
  
  
  
  # Shared error message for different ontologies
  enrichmentErrorMessage = ".runEnrichment: Could not calculate enrichment, the server returned an error. This may happen for multiple reasons, for example if no gene can be mapped. The results will be set to NA."
  
  if (ontology == "KEGG") {
    
     packageMessage = paste0("The package clusterProfiler is not installed, which is however needed for the chosen ontology enrichment. Please install it and re-run this function or change the ontology.")
    .checkPackageInstallation("clusterProfiler", packageMessage)
    
    if (grep(x = GRN@config$parameters$genomeAssembly, pattern = "^hg\\d\\d" )) {
      org = "hsa"
    } else if (grep(x = GRN@config$parameters$genomeAssembly, pattern = "^mm\\d\\d")) {
      org = "mmu"
    }
    
    kegg_enrichment = tryCatch({ 
      clusterProfiler::enrichKEGG(
        gene = foreground_entrez,
        universe = background_entrez,
        keyType = "ncbi-geneid",
        organism = org,
        pvalueCutoff = 1,
        qvalueCutoff = 1,
        minGSSize = minGSSize,
        maxGSSize = maxGSSize,
        pAdjustMethod = pAdjustMethod)
      
    }, error = function(e) {
      message = enrichmentErrorMessage
      .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
    }, warning = function(w) {
      message = enrichmentErrorMessage
      .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
    }
    )
    
    result.list[["results"]] = .createEnrichmentTable(kegg_enrichment)
    
  }
  
  if (ontology == "Reactome") {
    
    packageMessage = paste0("The package ReactomePA is not installed, which is however needed for the chosen ontology enrichment. Please install it and re-run this function or change the ontology.")
    .checkPackageInstallation("ReactomePA", packageMessage)
    
    reactome_enrichment = tryCatch({ 
      ReactomePA::enrichPathway(
        gene = foreground_entrez,
        universe = background_entrez,
        pvalueCutoff = 1,
        qvalueCutoff = 1,
        minGSSize = minGSSize,
        maxGSSize = maxGSSize,
        pAdjustMethod = pAdjustMethod)
      
    }, error = function(e) {
      message = enrichmentErrorMessage
      .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
    }, warning = function(w) {
      # dont print anything
    }
    )
    
    result.list[["results"]] = .createEnrichmentTable(reactome_enrichment)
    
  }
  
  if (ontology == "DO") {
    
    packageMessage = paste0("The package DOSE is not installed, which is however needed for the chosen ontology enrichment. Please install it and re-run this function or change the ontology.")
    .checkPackageInstallation("DOSE", packageMessage)
    
    DO_enrichment = tryCatch({
      
      DOSE::enrichDO(gene          = foreground_entrez,
                     universe      = background_entrez,
                     ont           = "DO",
                     pAdjustMethod = pAdjustMethod,
                     pvalueCutoff  = 1,
                     qvalueCutoff  = 1,
                     minGSSize     = minGSSize,
                     maxGSSize     = maxGSSize)
      
    }, error = function(e) {
      message = enrichmentErrorMessage
      .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
    }, warning = function(w) {
      # dont print anything
    }
    )
    
    result.list[["results"]] = .createEnrichmentTable(DO_enrichment)
    
  }
  
  
  # Common parameter list
  result.list[["parameters"]] = list(
    "statistic"   = statistic,
    "background"  = backgroundStr,
    "nBackground" = nBackground,
    "nForeground" = nForeground)
  
  # Non-shared parameters
  if (ontology %in% c("KEGG", "DO", "Reactome")) {
    result.list[["parameters"]]$pAdjustMethod = pAdjustMethod
  } else {
    result.list[["parameters"]]$algorithm = algorithm
  }
  
  
  
  
  return(result.list)
}


.createEnrichmentTable <- function(enrichmentObj) {
  
  if (!is.null(enrichmentObj)) {
    
    # GeneRatio and BgRatio are reported as fractions like 5/83, change it to numeric here
    result.tbl = enrichmentObj@result %>%
      dplyr::rename(Term = "Description", pval = "pvalue", Found = "Count")
    
    columnTypes = sapply(result.tbl, class)
    
    if (columnTypes["GeneRatio"] == "character") {
        result.tbl = dplyr::mutate(result.tbl, GeneRatio = sapply(parse(text = enrichmentObj@result$GeneRatio), eval))
    }
     
    
    if ("BgRatio" %in% colnames(result.tbl) && columnTypes["BgRatio"] == "character") {
        result.tbl = dplyr::mutate(result.tbl, BgRatio = sapply(parse(text = enrichmentObj@result$BgRatio), eval))
    }
    

    
  } else {
    
    # Set an empty data frame so downstream aggregation functions dont stumble upon this
    result.tbl = tibble::tribble(~ID, ~Term, ~GeneRatio, ~BgRatio, ~pval, ~p.adjust, ~qvalue, ~geneID, ~Found)
  }
  
  result.tbl
  
}

# getEnrichmentResults <- function(GRN, enrichmentGroup, ontology, filePath = NULL) {
#   
#   start = Sys.time()
#   GRN = .addFunctionLogToObject(GRN)
#   
#   checkmate::assertClass(GRN, "GRN")
#   checkmate::assertSubset(enrichmentType, c("general", "byCommunity", "byTF"))
#   checkmate::assertSubset(ontology, c("GO_BP", "GO_MF", "GO_CC"))
#   
#   if (enrichmentGroup == "general") {
#     
#   }
#   if (enrichmentGroup == "byCommunity")
#   
#   bind_rows(GRN@stats$Enrichment$general[c("GO_BP")], .id = "enrichmentGroup")
#   transpose(GRN@stats$Enrichment$byCommunity)
#   
# }


#' Generate graph communities and their summarizing statistics
#' 
#' The results can subsequently be visualized with the function \code{\link{plotCommunitiesStats}}
#' This function requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}}.
#' It then generates the TF-gene graph from the filtered connections, and clusters its vertices into communities using established community detection algorithms.
#' @template GRN
#' @param clustering Character. Default \code{louvain}. One of: \code{louvain}, \code{leiden}, \code{leading_eigen}, \code{fast_greedy}, \code{optimal}, \code{walktrap}. The community detection algorithm to be used. Please bear in mind the robustness and time consumption of the algorithms when opting for an alternative to the default. 
#' @param ... Additional parameters for the used clustering method, see the \code{igraph::cluster_*} methods for details on the specific parameters and what they do. For \code{leiden} clustering, for example, you may add a \code{resolution_parameter} to control the granularity of the community detection or \code{n_iterations} to modify the number of iterations.
#' @template forceRerun
#' @return An updated \code{\linkS4class{GRN}} object, with a table that consists of the connections clustered into communities stored in the 
#' \code{GRN@graph$TF_gene$clusterGraph} slot as well as within the \code{igraph} object in \code{GRN@graph$TF_gene$graph} (retrievable via \code{igraph} using \code{igraph::vertex.attributes(GRN@graph$TF_gene$graph)$community}, for example.)
#' @seealso \code{\link{plotCommunitiesStats}}
#' @seealso \code{\link{calculateCommunitiesEnrichment}}
#' @import patchwork
#' @examples 
#' # See the Workflow vignette on the GRaNIE website for examples
#' GRN = loadExampleObject()
#' GRN = calculateCommunitiesStats(GRN, forceRerun = FALSE)
#' @export
calculateCommunitiesStats <- function(GRN, clustering = "louvain", forceRerun = FALSE, ...) {
  
  start = Sys.time()
  checkmate::assertClass(GRN, "GRN")
  GRN = .addFunctionLogToObject(GRN)
  
  GRN = .makeObjectCompatible(GRN)
  
  checkmate::assertChoice(clustering, c("louvain", "leading_eigen", "fast_greedy", "optimal", "walktrap", "leiden"))
  checkmate::assertFlag(forceRerun)
  
  .checkGraphExistance(GRN)
  
  if (is.null(igraph::vertex.attributes(GRN@graph$TF_gene$graph)$community) | forceRerun) {
    
    futile.logger::flog.info(paste0("Calculating communities for clustering type ", clustering, "..."))
    
    if (clustering == "louvain") {
      
      communities_cluster = igraph::cluster_louvain(GRN@graph$TF_gene$graph, weights = NA, ...)
      
    } else if (clustering == "leading_eigen") {
      
      communities_cluster = igraph::cluster_leading_eigen(GRN@graph$TF_gene$graph, ...)
      
    } else if (clustering == "fast_greedy") {
      
      communities_cluster = igraph::cluster_fast_greedy(GRN@graph$TF_gene$graph, ...)
      
    } else if (clustering == "optimal") {
      
      communities_cluster = igraph::cluster_optimal(GRN@graph$TF_gene$graph, ...)
      
    } else if (clustering == "walktrap") {
      
      communities_cluster = igraph::cluster_walktrap(GRN@graph$TF_gene$graph, ...)
      
    } else if (clustering == "leiden") {
      
      # The default, see https://www.nature.com/articles/s41598-019-41695-z for a reasoning
      communities_cluster = igraph::cluster_leiden(GRN@graph$TF_gene$graph, ...)
      
    }
    
    
    
    # TODO: How redundant is it to store this separately?
    GRN@graph$TF_gene$clusterGraph = communities_cluster 
    
    # Add the community to the vertex metadata. First, sort them according to their size
    communities_count = sort(table(communities_cluster$membership), decreasing = TRUE)
    stopifnot(identical(igraph::vertex.attributes(GRN@graph$TF_gene$graph)$name, communities_cluster$names))
    igraph::vertex.attributes(GRN@graph$TF_gene$graph)$community = factor(communities_cluster$membership, levels = names(communities_count))
    
    nClustersMax = min(length(communities_count), 10)
    futile.logger::flog.info(paste0("Community summary for ", ifelse(length(communities_count) == nClustersMax, "all ", "largest "), nClustersMax, " communities (Number of nodes per community, sorted by community size):"))
    for (clusterCur in seq_len(nClustersMax)) {
      futile.logger::flog.info(paste0(" Community ", names(communities_count)[clusterCur], ": ", communities_count[clusterCur], " nodes"))
    }
    
    
    
  }  else {
      .printDataAlreadyExistsMessage()
  }
  
  .printExecutionTime(start)
  
  GRN
}


#' Run an enrichment analysis for the genes in each community in the filtered \code{\linkS4class{GRN}} object
#' 
#' The enrichment analysis is based on the subset of the network connected to a particular community as identified by \code{\link{calculateCommunitiesStats}} , see \code{\link{calculateTFEnrichment}} and \code{\link{calculateGeneralEnrichment}} for 
#' TF-specific and general enrichment, respectively.
#' This function requires the existence of the eGRN graph in the \code{\linkS4class{GRN}} object as produced by \code{\link{build_eGRN_graph}} as well as community information as calculated by \code{\link{calculateCommunitiesStats}}. 
#' Results can subsequently be visualized with the function \code{\link{plotCommunitiesEnrichment}}.
#' 
#' All enrichment functions use the TF-gene graph as defined in the `GRN` object. See the `ontology` argument for currently supported ontologies.
#' Also note that some parameter combinations for `algorithm` and `statistic` are incompatible, an error message will be thrown in such a case.
#' 
#' @inheritParams calculateGeneralEnrichment
#' @param selection Character. Default \code{"byRank"}. One of: \code{"byRank"}, \code{"byLabel"}. Specify whether the communities enrichment will by calculated based on their rank, where the largest community (with most vertices) would have a rank of 1, or by their label. Note that the label is independent of the rank.
#' @template communities
#' @return An updated \code{\linkS4class{GRN}} object, with the enrichment results stored in the \code{stats$Enrichment$byCommunity} slot.
#' @seealso \code{\link{plotCommunitiesEnrichment}}
#' @seealso \code{\link{plotGeneralEnrichment}}
#' @seealso \code{\link{calculateGeneralEnrichment}}
#' @seealso \code{\link{calculateCommunitiesStats}}
#' @examples 
#' # See the Workflow vignette on the GRaNIE website for examples
#' GRN = loadExampleObject()
#' GRN = calculateCommunitiesEnrichment(GRN, ontology = c("GO_BP"), forceRerun = FALSE)
#' @export
# #' @importFrom topGO whichAlgorithms whichTests
calculateCommunitiesEnrichment <- function(GRN, 
                                           ontology = c("GO_BP", "GO_MF"), algorithm = "weight01", 
                                           statistic = "fisher", 
                                           background = "neighborhood", background_geneTypes = "all",
                                           selection = "byRank", communities = NULL,
                                           pAdjustMethod = "BH",
                                           forceRerun = FALSE) {
  
  start = Sys.time()
  checkmate::assertClass(GRN, "GRN")
  GRN = .addFunctionLogToObject(GRN)
  
  GRN = .makeObjectCompatible(GRN)
  
  .checkGraphExistance(GRN)

  checkmate::assertSubset(ontology , c("GO_BP", "GO_MF", "GO_CC", "KEGG", "DO", "Reactome"), empty.ok = FALSE)
 
  .checkPackage_topGO_and_arguments(ontology, algorithm, statistic)
  
  checkmate::assertChoice(background, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood"))
  checkmate::assertSubset(background_geneTypes, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE)
  checkmate::assertChoice(selection, c("byRank", "byLabel"))
  checkmate::assertChoice(pAdjustMethod, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"))
  checkmate::assertFlag(forceRerun)
  
  
  .checkCommunityExistance(GRN)
  communitiesDisplay = .selectCommunities(GRN, selection, communities, refAll = "graph", graph = "TF_gene")
  
  if (!is.null(communities)) {
      
      futile.logger::flog.info(paste0("Selected communities based on ", ifelse(selection == "byRank", "rank (1 = largest, 2 = second largest, ...)", "label"), ": ", paste0(communities, collapse = ","), ". This corresponds to the following community labels: ", paste0(communitiesDisplay, collapse = ",")))

      if (length(communitiesDisplay) == 0) {
          message = paste("No communities (left) to run enrichment for. Adjust the settings accordingly.")
          .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
      }
    
  }
  # TODO
  
  futile.logger::flog.info(paste0("Running enrichment analysis for ", ifelse(is.null(communities), "all ", "selected "), length(communitiesDisplay), " communities. This may take a while..."))
  
  
  mapping = .getGenomeObject(GRN@config$parameters$genomeAssembly, type = "packageName")
  backgroundGenes = .getBackgroundGenes(GRN, type = background, gene.types = background_geneTypes)
  
  if (is.null(GRN@stats[["Enrichment"]][["byCommunity"]])) {
    GRN@stats[["Enrichment"]][["byCommunity"]] = list()
  }
  
  
  for (communityCur in communitiesDisplay) {
    
    futile.logger::flog.info(paste0(" Community ", communityCur))
    
    if (is.null(GRN@stats[["Enrichment"]][["byCommunity"]][[communityCur]])) {
      GRN@stats[["Enrichment"]][["byCommunity"]][[communityCur]] = list()
    }
    
    
    foregroundCur = igraph::vertex.attributes(GRN@graph$TF_gene$graph) %>%
      as.data.frame() %>%
      dplyr::filter(.data$community == communityCur) %>%
      dplyr::pull(.data$name)
    
    for (ontologyCur in ontology) {
      
      if (is.null(GRN@stats[["Enrichment"]][["byCommunity"]][[communityCur]][[ontologyCur]]) | forceRerun) {
        
        GRN@stats[["Enrichment"]][["byCommunity"]][[communityCur]][[ontologyCur]] = 
          .runEnrichment(GRN,
                         foreground = foregroundCur, 
                         background = backgroundGenes, 
                         backgroundStr = background,
                         ontology = ontologyCur, 
                         algorithm = algorithm, 
                         statistic = statistic,
                         mapping = mapping,
                         pAdjustMethod =  pAdjustMethod)
        
        futile.logger::flog.info(paste0("Result stored in GRN@stats$Enrichment$byCommunity[[\"", communityCur,  "\"]]$", ontologyCur, "$results"))
        
      }  else {
          .printDataAlreadyExistsMessage()
      }
      
      
      
    }
  }
  
  .printExecutionTime(start)
  
  GRN
}

.checkCommunityExistance <- function(GRN) {
    
    if (is.null(igraph::vertex.attributes(GRN@graph$TF_gene$graph)$community)) {
        message = paste("Could not find community information in the graph object. Run the function calculateCommunitiesStats")
        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
    }
    
    if (is.null(GRN@graph$TF_gene$clusterGraph)) {
        message = paste("Could not find community information in the graph object. Run the function calculateCommunitiesStats")
        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
    }
}

.selectCommunities <- function(GRN, display, communities, refAll, graph = "TF_gene") {
    
    checkmate::assertSubset(refAll, c("graph", "enrichment"))
    
    if (refAll == "graph") {
        allCalculatedCommunities = 
            igraph::vertex.attributes(GRN@graph[[graph]]$graph) %>%
            as.data.frame() %>%
            dplyr::pull("community") %>%
            unique() %>% 
            as.character()
        
    } else {
        allCalculatedCommunities = setdiff(names(GRN@stats$Enrichment$byCommunity), "combined")
        
    }
    
    if (!is.null(communities)) {
        if (display == "byRank") {
            # Only display communities we have data for, in a reasonable order
            checkmate::assertNumeric(communities, lower = 1, any.missing = FALSE, min.len = 1)
            communitiesDisplay = .selectCommunitesByRank(GRN, communities)
            
        } else if (display == "byLabel") { # byLabel
            
            if (is.null(communities)) {
                message = paste("If display = \"byLabel\", the parameter \"communities\" cannot be NULL.")
                .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
            }
            
            checkmate::assertCharacter(communities, min.chars = 1, any.missing = FALSE, min.len = 1)
            
            communitiesDisplay = as.character(communities)
            # issue a warning if the community label does not exist
            diff.communities = setdiff(communitiesDisplay, allCalculatedCommunities)
            if (length(diff.communities) > 0) {
                message = paste("calculateCommunitiesEnrichment: The following communities do not exist and will not be in the analysis: ", paste0(diff.communities, collapse = " + "))
                .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
                communitiesDisplay = setdiff(communitiesDisplay, diff.communities)
            }
            
        }
    } else {
        communitiesDisplay = allCalculatedCommunities
        
    }
    
    communitiesDisplay
}



.selectCommunitesByRank <- function(GRN, communities, graph = "TF_gene") {
  
  df = igraph::vertex.attributes(GRN@graph[[graph]]$graph) %>%
    as.data.frame() %>% 
    dplyr::count(.data$community)
  
  
  if (is.null(communities)) {
    communities = seq_len(nrow(df))
  }
  selCommunities = df %>% 
    dplyr::arrange(dplyr::desc(.data$n)) %>%
    dplyr::slice(communities) %>%
    dplyr::pull(.data$community) %>%
    as.character()
  
  if (length(selCommunities) == 0) {
      existingCommunities = unique(df$community)
      message = paste0("None of the requested communities (", paste0(communities, collapse = ","), ") were found. Only the following communities are available: ", paste0(existingCommunities, collapse = ","))
      .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
  }
  
  if (length(selCommunities) < length(communities)) {
      missingCommunities = setdiff(communities, selCommunities)
      message = paste0("calculateCommunitiesEnrichment: Some of the requested communities (", paste0(missingCommunities , collapse = ","), ") were not found and have been ignored. Only the following communities are available and have been taken: ", paste0(selCommunities, collapse = ","))
      .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
  }
  
  selCommunities
  
}

#' Retrieve the top nodes (TFs or genes) with respect to either degree or Eigenvector centrality in the filtered \code{\linkS4class{GRN}} object.
#' 
#' This function requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}}.
#' \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
#' 
#' @template GRN
#' @param nodeType Character. One of: \code{"gene"} or \code{"TF"}. Node type.
#' @param rankType Character. One of: \code{"degree"}, \code{"EV"}. This parameter will determine the criterion to be used to identify the "top" nodes. If set to "degree", the function will select top nodes based on the number of connections they have, i.e. based on their degree-centrality. If set to "EV" it will select the top nodes based on their eigenvector-centrality score in the network.
#' @param n Numeric. Default 0.1. If this parameter is passed as a value between [0,1], it is treated as a percentage of top nodes. If the value is passed as an integer >=1 it will be treated as the number of top nodes.
#' @param use_TF_gene_network \code{TRUE} or \code{FALSE}. Default \code{TRUE}. Should the TF-gene network be used (\code{TRUE}) or the TF-peak-gene network (\code{FALSE})?
#' @return A data frame with the node names and the corresponding scores used to rank them
#' @examples 
#' # See the Workflow vignette on the GRaNIE website for examples
#' GRN = loadExampleObject()
#' topGenes = getTopNodes(GRN, nodeType = "gene", rankType = "degree", n = 3)
#' topTFs = getTopNodes(GRN, nodeType = "TF", rankType = "EV", n = 5)
#' @export
getTopNodes <- function(GRN, nodeType, rankType, n = 0.1, use_TF_gene_network = TRUE) { # },
  #        TFConnectionType = "tf-gene", geneConnectionType = "peak-gene") {
  
  start = Sys.time()
  checkmate::assertClass(GRN, "GRN")
  GRN = .addFunctionLogToObject(GRN)
  
  GRN = .makeObjectCompatible(GRN)
  
  checkmate::assertChoice(nodeType, c("gene", "TF"))
  checkmate::assertChoice(rankType, c("degree", "EV"))
  checkmate::assertFlag(use_TF_gene_network)
  #checkmate::assertSubset(TFConnectionType, c("tf-gene", "tf-peak"))
  #checkmate::assertSubset(geneConnectionType, c("peak-gene", "tf-gene"))

  checkmate::assert(checkmate::checkNumeric(n, lower = 0.0001, upper = 0.999999), checkmate::checkIntegerish(n, lower = 1))
  
  .checkGraphExistance(GRN)
  
  if (nodeType == "gene") {
    slot = "gene.ENSEMBL"
    link = dplyr::if_else(use_TF_gene_network, "tf-gene", "peak-gene")
  } else {
    slot = "TF.ID"
    slot = "TF.ENSEMBL"
    link = dplyr::if_else(use_TF_gene_network, "tf-gene", "tf-peak")
  } 
  
  graphType = dplyr::if_else(use_TF_gene_network, "TF_gene", "TF_peak_gene")
  
  
  if (n < 1) {
    # Get the total number of distinct nodes and calculate a percentage of that irrespective of ndoe degree
    top.n =  (GRN@connections$all.filtered$`0`[[slot]] %>% 
                unique() %>% 
                stats::na.omit() %>% 
                length() * n) %>% round()
  }else{
    top.n = n
  }
  
  futile.logger::flog.info(paste0("n = ", n, " equals finding the top ", top.n, " ", rankType , "-central ", nodeType, "s in the network"))
  
  graph.df = GRN@graph[[graphType]]$table
  
  if (rankType == "degree") {
    col = dplyr::if_else(nodeType == "gene", "V2", "V1")
    topNodes = graph.df %>%
      dplyr::filter(.data$connectionType == link) %>%
      dplyr::count(!!as.name(col), sort = TRUE) %>%
      # dplyr::rename(!!slot := V1, Connections = n) %>%
      dplyr::rename(Connections = "n") %>%
      dplyr::arrange(dplyr::desc(.data$Connections)) %>%
      dplyr::slice(seq_len(top.n)) 
    
    # TODO: change column names
    if (nodeType == "gene") {
      topNodes = topNodes  %>%
        dplyr::left_join(graph.df %>% dplyr::select("V2", "V2_name") %>% dplyr::distinct(), by = "V2") %>%
        dplyr::rename(gene.ENSEMBL = "V2", gene.name = "V2_name")
    } else {
      topNodes = topNodes  %>%
        dplyr::left_join(graph.df %>% dplyr::select("V1", "V1_name") %>% dplyr::distinct(), by = "V1") %>%
        dplyr::rename(TF.ENSEMBL = "V1", TF.ID = "V1_name")
    }
    
    
    
  } else{ # if EV
    slot2 = dplyr::if_else(nodeType == "gene", "topGenes", "topTFs")
    topNodes = .getEigenCentralVertices(GRN, graphType = graphType, nCentralGenes = top.n, nCentralTFs = top.n)[[slot2]][["data"]]
  }
  
  # Remove unnecessary extra column
  if ("name_plot" %in% colnames(topNodes)) {
      topNodes = dplyr::select(topNodes, -"name_plot")
  }

  .printExecutionTime(start)
  return(topNodes)
}


#' Run an enrichment analysis for the set of genes connected to a particular TF or sets of TFs in the filtered \code{\linkS4class{GRN}} object
#' 
#' The enrichment analysis is based on the subset of the network connected to particular TFs (TF regulons), see \code{\link{calculateCommunitiesEnrichment}} and \code{\link{calculateGeneralEnrichment}} for 
#' community- and general enrichment, respectively.
#' This function requires the existence of the eGRN graph in the \code{\linkS4class{GRN}} object as produced by \code{\link{build_eGRN_graph}}. 
#' Results can subsequently be visualized with the function \code{\link{plotTFEnrichment}}.
#' 
#' All enrichment functions use the TF-gene graph as defined in the `GRN` object. See the `ontology` argument for currently supported ontologies.
#' Also note that some parameter combinations for `algorithm` and `statistic` are incompatible, an error message will be thrown in such a case.
#'  
#' @inheritParams calculateGeneralEnrichment
#' @param rankType Character. Default \code{"degree"}. One of: \code{"degree"}, \code{"EV"}, \code{"custom"}. This parameter will determine the criterion to be used to identify the "top" TFs. 
#' If set to "degree", the function will select top TFs based on the number of connections to genes they have, i.e. based on their degree-centrality. 
#' If set to \code{"EV"} it will select the top TFs based on their eigenvector-centrality score in the network. 
#' If set to custom, a set of TF IDs will have to be passed to the "TF.IDs" parameter.
#' @param n Numeric. Default 3. If this parameter is passed as a value between 0 and 1, it is treated as a percentage of top nodes. If the value is passed as an integer it will be treated as the number of top nodes. This parameter is not relevant if \code{rankType = "custom"}.
#' @param TF.IDs Character vector. Default \code{NULL}. If the rank type is set to \code{"custom"}, a vector of TF IDs for which the GO enrichment should be calculated should be passed to this parameter.
#' @return An updated \code{\linkS4class{GRN}} object, with the enrichment results stored in the \code{stats$Enrichment$byTF} slot.
#' @seealso \code{\link{plotTFEnrichment}}
#' @examples 
#' # See the Workflow vignette on the GRaNIE website for examples
#' GRN =  loadExampleObject()
#' GRN =  calculateTFEnrichment(GRN, n = 5, ontology = "GO_BP", forceRerun = FALSE)
#' @export
# #' @importFrom topGO whichAlgorithms whichTests
calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.IDs = NULL,
                                  ontology = c("GO_BP", "GO_MF"), algorithm = "weight01", 
                                  statistic = "fisher", 
                                  background = "neighborhood", background_geneTypes = "all",
                                  pAdjustMethod = "BH",
                                  forceRerun = FALSE) {
  
  start = Sys.time()
  checkmate::assertClass(GRN, "GRN")
  GRN = .addFunctionLogToObject(GRN)
  
  GRN = .makeObjectCompatible(GRN)
  
  checkmate::assertChoice(rankType, c("degree", "EV", "custom"))
  checkmate::assert(checkmate::checkNumeric(n, lower = 0.0001, upper = 0.999999), checkmate::checkIntegerish(n, lower = 1))
  checkmate::assertSubset(ontology , c("GO_BP", "GO_MF", "GO_CC", "KEGG", "Reactome", "DO"), empty.ok = FALSE)
  
  .checkPackage_topGO_and_arguments(ontology, algorithm, statistic)

  checkmate::assertChoice(background, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood"))
  checkmate::assertSubset(background_geneTypes, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE)
  checkmate::assertChoice(pAdjustMethod, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"))
  checkmate::assertFlag(forceRerun)
  
  futile.logger::flog.info(paste0("Calculating TF enrichment. This may take a while"))
  
  .checkGraphExistance(GRN)
  
  if (rankType == "custom") {
    if (is.null(TF.IDs)) {
      futile.logger::flog.error("To calculate the GO enrichment for a custom set of TFs, you must provide the TF IDs in the 'TF.IDs' parameter.")
    }
      
    if (!all(TF.IDs %in% GRN@connections$all.filtered$`0`$TF.ID)) {
        wrongTFs = setdiff(TF.IDs, unique(GRN@connections$all.filtered$`0`$TF.ID))
        message = paste0("All TF IDs that are provided when using a custom rankType must be contained in the GRN@connections$all.filtered$`0` slot (column TF.ID). However, at least one is not: ", paste0(wrongTFs, collapse = ", "))
        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
    }
    

    TFset = TF.IDs

    
  } else{
    
    # TF.ID is always there, irrespective of whether ENSEMBL ID or TF name is used as primary ID type
    TFset = getTopNodes(GRN, nodeType = "TF", rankType = rankType, n, use_TF_gene_network = TRUE) %>% dplyr::pull(.data$TF.ID)
  }
  
  # TODO: Continue working on the TF.ID level or switch to Ensembl? Should be in concordance with the graph!
  
  mapping = .getGenomeObject(GRN@config$parameters$genomeAssembly, type = "packageName")
  
  if (is.null(GRN@stats[["Enrichment"]][["byTF"]])) {
    
    GRN@stats[["Enrichment"]][["byTF"]] = list()
  }
  
  
  if (length(TFset) > 0) {
    futile.logger::flog.info(paste0("Running enrichment analysis for the following TFs: ", paste0(TFset, collapse = ", ")))
    
  } else {
    message = paste0("No TF fulfills the chosen criteria. Try increasing the value of the parameter n")
    .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
  }
  
  for (TF in as.character(TFset)) {
    
    futile.logger::flog.info(paste0(" Running enrichment analysis for genes connected to the TF ", TF))
    
    # get the genes associated with current top TF
    curGenes = GRN@connections$all.filtered$`0` %>% 
      dplyr::filter(.data$TF.ID == TF) %>% 
      dplyr::pull(.data$gene.ENSEMBL) %>%
      unique()
    
    
    backgroundGenes = .getBackgroundGenes(GRN, type = background, gene.types = background_geneTypes)
    
    for (ontologyCur in ontology) {
      
      futile.logger::flog.info(paste0("  Ontology ", ontologyCur))
      
      if (is.null(GRN@stats[["Enrichment"]][["byTF"]][[TF]][[ontologyCur]]) | forceRerun) {
        
        if (is.null(GRN@stats[["Enrichment"]][["byTF"]][[TF]])) {
          GRN@stats[["Enrichment"]][["byTF"]][[TF]] = list()
        }
        
        
        GRN@stats[["Enrichment"]][["byTF"]][[TF]][[ontologyCur]] =  
          .runEnrichment(GRN,
                         foreground = curGenes,
                         background = backgroundGenes, 
                         backgroundStr = background,
                         ontology = ontologyCur, 
                         algorithm = algorithm, 
                         statistic = statistic,
                         mapping = mapping,
                         pAdjustMethod =  pAdjustMethod)
        
        futile.logger::flog.info(paste0("   Results stored in GRN@stats$Enrichment$byTF[[\"", TF, "\"]]$", ontologyCur, "$results"))
        
      } else {
          .printDataAlreadyExistsMessage(slotName = paste0("Enrichment$byTF$", TF, "$", ontologyCur))
      }
    }
    
  }
  
  .printExecutionTime(start)
  GRN
}


.checkGraphExistance <- function(GRN) {
    
    if (is.null(GRN@graph$TF_peak_gene) | is.null(GRN@graph$TF_gene)) {
        message = paste0("Could not find graph slot in the object. (Re)run the function build_eGRN_graph")
        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
    }
    
    
}
