% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nmr_data_analysis.R
\name{random_subsampling}
\alias{random_subsampling}
\title{Random subsampling}
\usage{
random_subsampling(
  sample_idx,
  iterations = 10L,
  test_size = 0.25,
  keep_together = NULL,
  balance_in_train = NULL
)
}
\arguments{
\item{sample_idx}{Typically a numeric vector with sample index to be separated.
A character vector with sample IDs could also be used}

\item{iterations}{An integer, the number of iterations in the random subsampling}

\item{test_size}{A number between 0 and 1. The samples to be included in the
test set on each interation.}

\item{keep_together}{Either \code{NULL} or a factor with the same length as \code{sample_idx}.
\code{keep_together} can be used to ensure that groups of samples are kept
in together in all iterations (either on training or on test, but never split).
A typical use case for this is when you have sample replicates and you want
to keep all replicates together to prevent overoptimistic results (having
one sample on the train subset and its replicate on the test subset would
make the prediction easier to guess).
Another use case for this is when you have a longitudinal study and you
want to keep some subjects in the same train or test group, because you
want to use some information in a longitudinal way (e.g. a multilevel plsda model).}

\item{balance_in_train}{Either \code{NULL} or a factor with the same length as \code{sample_idx}.
\code{balance_in_train} can be used to force that on each iteration, the train
partition contains the same number of samples of the given factor levels.
For instance, if we have a dataset with 40 samples of class "A" and 20 samples
of class "B", using a \code{test_size = 0.25}, we can force to always have 16
samples of class "A" and 16 samples of class "B" in the training subset.
This is beneficial to those algorithms that require that the training groups
are balanced.}
}
\value{
A list of length equal to \code{iterations}. Each element of the list is
a list with two entries (\code{training} and \code{test}) containing the \code{sample_idx}
values that will belong to each subset.
}
\description{
Random subsampling
}
\examples{
random_subsampling(1:100, iterations = 4, test_size = 0.25)

subject_id <- c("Alice", "Bob", "Charlie", "Eve")
random_subsampling(1:4, iterations = 2, test_size = 0.25, keep_together = subject_id)

}
