% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simulate_data.R
\name{simulate_data}
\alias{simulate_data}
\title{simulate_data}
\usage{
simulate_data(
  .data,
  .estimate_object,
  formula_composition,
  formula_variability = NULL,
  .sample = NULL,
  .cell_group = NULL,
  .coefficients = NULL,
  variability_multiplier = 5,
  number_of_draws = 1,
  mcmc_seed = sample_seed(),
  cores = detectCores(),
  sig_figs = 9,
  cache_stan_model = sccomp_stan_models_cache_dir
)
}
\arguments{
\item{.data}{A tibble including a cell_group name column | sample name column | read counts column | factor columns | Pvalue column | a significance column}

\item{.estimate_object}{The result of sccomp_estimate execution. This is used for sampling from real-data properties.}

\item{formula_composition}{A formula. The formula describing the model for differential abundance, for example ~treatment}

\item{formula_variability}{A formula. The formula describing the model for differential variability, for example ~treatment}

\item{.sample}{A column name as symbol. The sample identifier}

\item{.cell_group}{A column name as symbol. The cell_group identifier}

\item{.coefficients}{The column names for coefficients, for example, c(b_0, b_1)}

\item{variability_multiplier}{A real scalar. This can be used for artificially increasing the variability of the simulation for benchmarking purposes.}

\item{number_of_draws}{An integer. How may copies of the data you want to draw from the model joint posterior distribution.}

\item{mcmc_seed}{An integer. Used for Markov-chain Monte Carlo reproducibility. By default a random number is sampled from 1 to 999999. This itself can be controlled by set.seed()#' @param cores Integer, the number of cores to be used for parallel calculations.}

\item{cores}{Integer, the number of cores to be used for parallel calculations.}

\item{sig_figs}{Number of significant figures to use for Stan model output. Default is 9.}

\item{cache_stan_model}{A character string specifying the cache directory for compiled Stan models.
The sccomp version will be automatically appended to ensure version isolation.
Default is \code{sccomp_stan_models_cache_dir} which points to \verb{~/.sccomp_models}.}
}
\value{
A tibble (\code{tbl}) with the following columns:
\itemize{
\item \strong{sample} - A character column representing the sample name.
\item \strong{type} - A factor column representing the type of the sample.
\item \strong{phenotype} - A factor column representing the phenotype in the data.
\item \strong{count} - An integer column representing the original cell counts.
\item \strong{cell_group} - A character column representing the cell group identifier.
\item \strong{b_0} - A numeric column representing the first coefficient used for simulation.
\item \strong{b_1} - A numeric column representing the second coefficient used for simulation.
\item \strong{generated_proportions} - A numeric column representing the generated proportions from the simulation.
\item \strong{generated_counts} - An integer column representing the generated cell counts from the simulation.
\item \strong{replicate} - An integer column representing the replicate number for each draw from the posterior distribution.
}
}
\description{
This function simulates data from a fitted model.
}
\examples{

print("cmdstanr is needed to run this example.")
# Note: Before running the example, ensure that the 'cmdstanr' package is installed:
# install.packages("cmdstanr", repos = c("https://stan-dev.r-universe.dev/", getOption("repos")))

\donttest{
  if (instantiate::stan_cmdstan_exists()) {
    data("counts_obj")
    library(dplyr)

    estimate = sccomp_estimate(
      counts_obj,
      ~ type, ~1, "sample", "cell_group", "count",
      cores = 1
    )

    # Set coefficients for cell_groups. In this case all coefficients are 0 for simplicity.
    counts_obj = counts_obj |> mutate(b_0 = 0, b_1 = 0)

    # Simulate data
    simulate_data(counts_obj, estimate, ~type, ~1, sample, cell_group, c(b_0, b_1))
  }
}

}
\references{
S. Mangiola, A.J. Roth-Schulze, M. Trussart, E. Zozaya-Valdés, M. Ma, Z. Gao, A.F. Rubin, T.P. Speed, H. Shim, & A.T. Papenfuss, sccomp: Robust differential composition and variability analysis for single-cell data, Proc. Natl. Acad. Sci. U.S.A. 120 (33) e2203828120, https://doi.org/10.1073/pnas.2203828120 (2023).
}
