% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/analysis-functions.R
\name{CIS_grubbs}
\alias{CIS_grubbs}
\title{Grubbs test for Common Insertion Sites (CIS).}
\usage{
CIS_grubbs(
  x,
  genomic_annotation_file = "hg19",
  grubbs_flanking_gene_bp = 1e+05,
  threshold_alpha = 0.05,
  by = NULL,
  return_missing_as_df = TRUE,
  results_as_list = TRUE
)
}
\arguments{
\item{x}{An integration matrix, must include the \code{mandatory_IS_vars()}
columns and the \code{annotation_IS_vars()} columns}

\item{genomic_annotation_file}{Database file for gene annotation,
see details.}

\item{grubbs_flanking_gene_bp}{Number of base pairs flanking a gene}

\item{threshold_alpha}{Significance threshold}

\item{by}{Either \code{NULL} or a character vector of column names. If not
NULL, the function will perform calculations for each group and return
a list of data frames with the results. E.g. for \code{by = "SubjectID"},
CIS will be computed for each distinct SubjectID found in the table
("SubjectID" column must be included in the input data frame).}

\item{return_missing_as_df}{Returns those genes present in the input df
but not in the refgenes as a data frame?}

\item{results_as_list}{If \code{TRUE}
return the group computations as a named list, otherwise return a single
df with an additional column containing the group id}
}
\value{
A data frame
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
Statistical approach for the validation of common insertion sites
significance based on the comparison of the integration frequency
at the CIS gene with respect to other genes contained in the
surrounding genomic regions. For more details please refer to
this paper:
\url{https://ashpublications.org/blood/article/117/20/5332/21206/Lentiviral-vector-common-integration-sites-in}
}
\details{
\subsection{Genomic annotation file}{

A data frame containing
genes annotation for the specific genome.
From version \verb{1.5.4} the argument \code{genomic_annotation_file} accepts only
data frames or package provided defaults.
The user is responsible for importing the appropriate tabular files if
customization is needed.
The annotations for the human genome (hg19 or hg38) and
murine genome (mm9 or mm10) are already
included in this package: to use one of them just
set the argument \code{genomic_annotation_file} to either \code{"hg19"}, \code{"hg38"},
\code{"mm9"} or \code{"mm10"}.
If for any reason the user is performing an analysis on another genome,
this file needs to be changed respecting the USCS Genome Browser
format, meaning the input file headers should include:

name2, chrom, strand, min_txStart, max_txEnd, minmax_TxLen, average_TxLen, name, min_cdsStart, max_cdsEnd, minmax_CdsLen, average_CdsLen
}
}
\section{Required tags}{

The function will explicitly check for the presence of these tags:
\itemize{
\item chromosome
\item locus
\item is_strand
\item gene_symbol
\item gene_strand
}
}

\examples{
data("integration_matrices", package = "ISAnalytics")
cis <- CIS_grubbs(integration_matrices)
cis
}
\seealso{
Other Analysis functions: 
\code{\link{HSC_population_size_estimate}()},
\code{\link{compute_abundance}()},
\code{\link{cumulative_is}()},
\code{\link{gene_frequency_fisher}()},
\code{\link{is_sharing}()},
\code{\link{iss_source}()},
\code{\link{sample_statistics}()},
\code{\link{top_integrations}()},
\code{\link{top_targeted_genes}()}
}
\concept{Analysis functions}
