% Generated by roxygen2: do not edit by hand
% Please edit documentation in
%   R/imbalance_calculate_list_average_scaling_factors.R
\name{imbalance_calculate_list_average_scaling_factors}
\alias{imbalance_calculate_list_average_scaling_factors}
\title{Calculate Per-Sample Scaling Factors for Multiple Spike-in Groups}
\usage{
imbalance_calculate_list_average_scaling_factors(
  obj,
  spiked_species_list,
  spiked_cells_list,
  merge_method = c("sum", "max"),
  normalize = TRUE,
  allow_infinite = FALSE,
  verbose = FALSE
)
}
\arguments{
\item{obj}{A \code{phyloseq::phyloseq} or
\code{TreeSummarizedExperiment::TreeSummarizedExperiment} object.}

\item{spiked_species_list}{A named list of character vectors giving the
spike-in species names (as in \code{tax_table$Species} or \code{rowData()}).}

\item{spiked_cells_list}{A named list (same length as \code{spiked_species_list})
containing scalar or named numeric vectors of expected spike-in cells per sample.}

\item{merge_method}{\code{"sum"} (default) or \code{"max"}. Defines how OTUs within a
spike-in group are merged.}

\item{normalize}{Logical; if \code{TRUE}, scaling factors are normalized
so that their median equals 1. Default = \code{TRUE}.}

\item{allow_infinite}{Logical; if \code{TRUE}, zero spike reads return \code{Inf}
instead of \code{NA}. Default = \code{FALSE}.}

\item{verbose}{Logical; if \code{TRUE}, prints per-group summaries.}
}
\value{
Named numeric vector of scaling factors (one per sample).
}
\description{
Computes per-sample scaling factors for multiple spike-in taxa (e.g.,
\emph{Bacillus_spike}, \emph{Flavobacterium_spike}) in either a \code{phyloseq} or
\code{TreeSummarizedExperiment} object. Handles variable spike-in cell counts per
sample and supports \code{"sum"} or \code{"max"} OTU merging methods.
}
\details{
Scaling factors are computed as:
\deqn{ScalingFactor = ExpectedSpikeCells / ObservedSpikeReads}

For each spike-in group:
\enumerate{
\item Identify OTUs matching that spike species via the \code{Species} column.
\item Merge those OTUs per sample (\code{sum} or \code{max}).
\item Divide expected spike cells by observed reads.
\item Average across all spike-in groups to produce one factor per sample.
}

Uses full matrix preallocation (no incremental vector growth) for Bioconductor
compliance. Missing values (zero spike reads) are set to \code{NA} or \code{Inf} if
\code{allow_infinite = TRUE}. Samples with all \code{NA} receive scaling = 1.
}
\examples{
if (requireNamespace("phyloseq", quietly = TRUE)) {
  library(phyloseq)

  ## Example dataset
  otu <- matrix(
    c(
      6000, 6200, 5900, 6100,
      4000, 4200, 3900, 4100,
      2000, 1900, 2100, 2050,
      1300, 1250, 1350, 1400,
       500,  800,  900,  700,   # Flavobacterium_spike
       900, 1200, 1100, 1000    # Bacillus_spike
    ),
    nrow = 6, byrow = TRUE,
    dimnames = list(
      c("OTU1", "OTU2", "OTU3", "OTU4",
        "Flavobacterium_spike", "Bacillus_spike"),
      c("S1", "S2", "S3", "S4")
    )
  )

  tax <- data.frame(
    Kingdom = rep("Bacteria", 6),
    Species = c("OTU1", "OTU2", "OTU3", "OTU4",
                "Flavobacterium_spike", "Bacillus_spike"),
    row.names = rownames(otu)
  )

  #  Fixed: add a column so sample_data is valid
  sam <- data.frame(SampleID = c("S1", "S2", "S3", "S4"),
                    row.names = c("S1", "S2", "S3", "S4"))

  ps <- phyloseq(
    otu_table(otu, taxa_are_rows = TRUE),
    tax_table(as.matrix(tax)),
    sample_data(sam)
  )

  spiked_species_list <- list(
    Flavo = "Flavobacterium_spike",
    Bacillus = "Bacillus_spike"
  )

  spiked_cells_list <- list(
    Flavo = c(S1 = 1e7, S2 = 3e7, S3 = 6e7, S4 = 2e7),
    Bacillus = c(S1 = 2e7, S2 = 1e7, S3 = 5e7, S4 = 3e7)
  )

  ## Works for both phyloseq and TSE:
  factors_phy <- imbalance_calculate_list_average_scaling_factors(
    ps, spiked_species_list, spiked_cells_list, normalize = FALSE
  )

  tss <- convert_phyloseq_to_tse(ps)
  factors_tse <- imbalance_calculate_list_average_scaling_factors(
    tss, spiked_species_list, spiked_cells_list, normalize = FALSE
  )

  all.equal(factors_phy, factors_tse)
}

}
