% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/VariableAssociation.R
\name{VariableAssociation}
\alias{VariableAssociation}
\title{Variable Association Analysis}
\usage{
VariableAssociation(
  method = c("ssGSEA", "logmedian", "ranking", "GSEA"),
  data,
  metadata,
  cols,
  gene_set,
  mode = c("simple", "medium", "extensive"),
  stat = NULL,
  ignore_NAs = FALSE,
  signif_color = "red",
  nonsignif_color = "grey",
  sig_threshold = 0.05,
  saturation_value = NULL,
  widthlabels = 18,
  labsize = 10,
  titlesize = 14,
  pointSize = 5,
  discrete_colors = NULL,
  continuous_color = "#8C6D03",
  color_palette = "Set2",
  printplt = TRUE
)
}
\arguments{
\item{method}{Character string specifying the method to use. One of:
\itemize{
\item \code{"logmedian"}
\item \code{"ssGSEA"}
\item \code{"ranking"}
\item \code{"GSEA"}
}}

\item{data}{A data frame with gene expression data (genes as rows,
samples as columns).}

\item{metadata}{A data frame containing sample metadata; the first column
should be the sampleID.}

\item{cols}{Character vector of metadata column names to analyze.}

\item{gene_set}{A named list of gene sets:
\itemize{
\item For score-based methods: list of gene vectors.
\item For GSEA: list of vectors (unidirectional) or
data frames (bidirectional).
}}

\item{mode}{Contrast mode: \code{"simple"} (default), \code{"medium"}, or \code{"extensive"}.}

\item{stat}{(GSEA only) Optional. Statistic for ranking genes
(\code{"B"} or \code{"t"}). Auto-detected if \code{NULL}.}

\item{ignore_NAs}{(GSEA only) Logical. If \code{TRUE}, rows with NA metadata are
removed. Default: \code{FALSE}.}

\item{signif_color}{Color used for significant associations
(default: \code{"red"}).}

\item{nonsignif_color}{Color used for non-significant associations
(default: \code{"grey"}).}

\item{sig_threshold}{Numeric significance cutoff (default: \code{0.05}).}

\item{saturation_value}{Lower limit for p-value coloring (default: auto).}

\item{widthlabels}{Integer for contrast label width before wrapping
(default: \code{18}).}

\item{labsize}{Axis text size (default: \code{10}).}

\item{titlesize}{Plot title size (default: \code{14}).}

\item{pointSize}{Size of plot points (default: \code{5}).}

\item{discrete_colors}{(Score-based only) Optional named list mapping
factor levels to colors.}

\item{continuous_color}{(Score-based only) Color for continuous variable
points (default: \code{"#8C6D03"}).}

\item{color_palette}{(Score-based only) ColorBrewer palette name for
categorical variables (default: \code{"Set2"}).}

\item{printplt}{Logical. If \code{TRUE}, plots are printed. Default: \code{TRUE}.}
}
\value{
A list with method-specific results and ggplot2-based visualizations:

\strong{For score-based methods (\code{logmedian}, \code{ssGSEA}, \code{ranking}):}
\itemize{
\item \code{Overall}: Data frame of effect sizes (Cohen's f) and p-values for each
metadata variable.
\item \code{Contrasts}: Data frame of Cohen's d values and adjusted p-values for
pairwise comparisons (based on \code{mode}).
\item \code{plot}: A combined visualization including:
\itemize{
\item Lollipop plots of Cohen's f,
\item Distribution plots by variable (density or scatter),
\item Lollipop plots of Cohen's d for contrasts.
}
\item \code{plot_contrasts}: Lollipop plots of Cohen's d effect sizes, colored by
adjusted p-values (BH).
\item \code{plot_overall}: Lollipop plot of Cohen's f, colored by p-values.
\item \code{plot_distributions}: List of distribution plots of scores by variable.
}

\strong{For GSEA-based method (\code{GSEA}):}
\itemize{
\item \code{data}: A data frame with GSEA results, including normalized enrichment
scores (NES), adjusted p-values, and contrasts.
\item \code{plot}: A ggplot2 lollipop plot of GSEA enrichment across contrasts.
}
}
\description{
This unified function evaluates associations between gene expression and
sample metadata
using multiple methods: score-based (logmedian, ssGSEA, ranking) or
GSEA-based association.
The function returns statistical results and visualizations summarizing
effect sizes and significance.
}
\examples{
# Simulate gene expression data (genes as rows, samples as columns)
set.seed(42)
expr <- as.data.frame(matrix(rnorm(500), nrow = 50, ncol = 10))
rownames(expr) <- paste0("Gene", 1:50)
colnames(expr) <- paste0("Sample", 1:10)

# Simulate metadata (categorical and continuous)
metadata <- data.frame(
  sampleID = paste0("Sample", 1:10),
  Group = rep(c("A", "B"), each = 5),
  Age = sample(20:60, 10),
  row.names = colnames(expr)
)

# Define a toy gene set: one gene set only for discovery mode!
gene_set <- list(
  Signature1 = paste0("Gene", 1:10)
)

# Score-based association (e.g., logmedian)
res_score <- VariableAssociation(
  method = "logmedian",
  data = expr,
  metadata = metadata,
  cols = c("Group", "Age"),
  gene_set = gene_set
)
print(res_score$Overall)
print(res_score$plot)

# GSEA-based association (if GSEA_VariableAssociation is available)
# res_gsea <- VariableAssociation(
#   method = "GSEA",
#   data = expr,
#   metadata = metadata,
#   cols = "Group",
#   gene_set = gene_set
# )
# print(res_gsea$data)
print(res_score$plot)


}
