% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/main.R
\name{run_kmer_spma}
\alias{run_kmer_spma}
\title{\emph{k}-mer-based Spectrum Motif Analysis}
\usage{
run_kmer_spma(
  sorted_transcript_sequences,
  sorted_transcript_values = NULL,
  transcript_values_label = "transcript value",
  motifs = NULL,
  k = 6,
  n_bins = 40,
  midpoint = 0,
  x_value_limits = NULL,
  max_model_degree = 1,
  max_cs_permutations = 1e+07,
  min_cs_permutations = 5000,
  fg_permutations = 5000,
  p_adjust_method = "BH",
  p_combining_method = "fisher",
  n_cores = 1
)
}
\arguments{
\item{sorted_transcript_sequences}{character vector of ranked sequences,
either DNA
(only containing upper case characters A, C, G, T) or RNA (A, C, G, U).
The sequences in \code{sorted_transcript_sequences} must be
ranked (i.e., sorted).
Commonly used sorting criteria are measures of differential expression, such
as fold change or signal-to-noise ratio (e.g., between treatment and control
samples in gene expression profiling experiments).}

\item{sorted_transcript_values}{vector of sorted transcript values, i.e.,
the fold change or signal-to-noise ratio or any other quantity that was used
to sort the transcripts that were passed to \code{run_matrix_spma} or
\code{run_kmer_spma} (default value is \code{NULL}). These values are
displayed as a semi-transparent area over the enrichment value heatmaps
of spectrum plots.}

\item{transcript_values_label}{label of transcript sorting criterion
(e.g., \code{"log fold change"}, default value is \code{"transcript value"}),
only shown if \code{!is.null(sorted_transcript_values)}}

\item{motifs}{a list of motifs that is used to score the specified sequences.
If \code{is.null(motifs)} then all Transite motifs are used.}

\item{k}{length of \emph{k}-mer, either \code{6} for hexamers or
\code{7} for heptamers}

\item{n_bins}{specifies the number of bins in which the sequences
will be divided,
valid values are between 7 and 100}

\item{midpoint}{for enrichment values the midpoint should be \code{1},
for log enrichment values \code{0} (defaults to \code{0})}

\item{x_value_limits}{sets limits of the x-value color scale (used to
harmonize color scales of different spectrum plots), see \code{limits}
argument of \code{\link[ggplot2]{continuous_scale}} (defaults to
\code{NULL}, i.e., the data-dependent default scale range)}

\item{max_model_degree}{maximum degree of polynomial}

\item{max_cs_permutations}{maximum number of permutations performed in
Monte Carlo test for consistency score}

\item{min_cs_permutations}{minimum number of permutations performed in
Monte Carlo test for consistency score}

\item{fg_permutations}{numer of foreground permutations}

\item{p_adjust_method}{see \code{\link[stats]{p.adjust}}}

\item{p_combining_method}{one of the following: Fisher (1932)
(\code{"fisher"}), Stouffer (1949),
Liptak (1958) (\code{"SL"}), Mudholkar and George (1979)
(\code{"MG"}), and Tippett (1931)
(\code{"tippett"}) (see \code{\link{p_combine}})}

\item{n_cores}{number of computing cores to use}
}
\value{
A list with the following components:
\tabular{rl}{
  \code{foreground_scores} \tab the result of \code{\link{run_kmer_tsma}}
  for the binned data\cr
  \code{spectrum_info_df} \tab a data frame with the SPMA results\cr
  \code{spectrum_plots} \tab a list of spectrum plots, as generated by
  \code{\link{score_spectrum}}\cr
  \code{classifier_scores} \tab a list of classifier scores, as returned by
  \code{\link{classify_spectrum}}
}
}
\description{
SPMA helps to illuminate the relationship between RBP binding evidence
and the transcript
sorting criterion, e.g., fold change between treatment and control samples.
}
\details{
In order to investigate how motif targets are distributed across a
spectrum of
transcripts (e.g., all transcripts of a platform, ordered by fold change),
Spectrum Motif Analysis visualizes the gradient of RBP binding evidence
across all transcripts.

The \emph{k}-mer-based approach differs from the matrix-based approach by
how the sequences are
scored. Here, sequences are broken into \emph{k}-mers, i.e.,
oligonucleotide sequences of
\emph{k} bases.
And only statistically significantly enriched or depleted \emph{k}-mers
are then used to
calculate a score for each RNA-binding protein, which quantifies its
target overrepresentation.
}
\examples{
# example data set
background_df <- transite:::ge$background_df
# sort sequences by signal-to-noise ratio
background_df <- dplyr::arrange(background_df, value)
# character vector of named and ranked (by signal-to-noise ratio) sequences
background_seqs <- gsub("T", "U", background_df$seq)
names(background_seqs) <- paste0(background_df$refseq, "|",
  background_df$seq_type)

results <- run_kmer_spma(background_seqs,
                         sorted_transcript_values = background_df$value,
                         transcript_values_label = "signal-to-noise ratio",
                         motifs = get_motif_by_id("M178_0.6"),
                         n_bins = 20,
                         fg_permutations = 10)

\dontrun{
results <- run_kmer_spma(background_seqs,
                         sorted_transcript_values = background_df$value,
                         transcript_values_label = "signal-to-noise ratio")}

}
\seealso{
Other SPMA functions: 
\code{\link{classify_spectrum}()},
\code{\link{run_matrix_spma}()},
\code{\link{score_spectrum}()},
\code{\link{subdivide_data}()}

Other \emph{k}-mer functions: 
\code{\link{calculate_kmer_enrichment}()},
\code{\link{check_kmers}()},
\code{\link{compute_kmer_enrichment}()},
\code{\link{count_homopolymer_corrected_kmers}()},
\code{\link{create_kmer_origin_list}()},
\code{\link{draw_volcano_plot}()},
\code{\link{estimate_significance}()},
\code{\link{estimate_significance_core}()},
\code{\link{generate_kmers}()},
\code{\link{generate_permuted_enrichments}()},
\code{\link{run_kmer_tsma}()}
}
\concept{SPMA functions}
\concept{\emph{k}-mer functions}
