% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/loading_helper_functions.r
\name{load_se_from_tables}
\alias{load_se_from_tables}
\alias{load_se_from_files}
\title{load_se_from_tables}
\usage{
load_se_from_tables(counts_matrix, cell_info_table, gene_info_table = NA,
  group_col_name = "group", cell_col_name = NA)

load_se_from_files(counts_file, cell_info_file, gene_info_file = NA,
  group_col_name = "group", cell_col_name = NA)
}
\arguments{
\item{counts_matrix}{A tab-separated matrix of read counts for each gene
(row) and each cell (column). Columns and rows should be named.}

\item{cell_info_table}{Table of cell information. 
If there is a column labelled
\emph{cell_sample}, that will be used as the unique cell identifiers. 
If not, the first column is assumed to be cell identifiers, and will be 
copied to a new feild labelled \emph{cell_sample}.
Similarly - the clusters of these cells should be listed in one column -
which can be called 'group' (case-sensitive) or specified with
\bold{group_col_name}. \emph{Minimal data format: <cell_sample> <group>}}

\item{gene_info_table}{Optional table of gene information. If there is a
column labelled
\emph{ID}, that will be used as the gene identifiers (they must be unique!).
If not, the first column is assumed to be a gene identifier, and will be 
copied to a
new feild labelled \emph{ID}. Must match all rownames in 
\bold{counts_matrix}.
If omitted, ID wll be generated from the rownames of counts_matrix. 
Default=NA}

\item{group_col_name}{Name of the column in \bold{cell_info_table} 
containing
the cluster/group that each cell belongs to. Case-sensitive. Default='group'}

\item{cell_col_name}{Name of the column in \bold{cell_info_table} containing
a cell id. Ignored if \emph{cell_sample} column is already present. 
If omitted, (and no \emph{cell_sample} column) will use first column.
Case-sensitive. Default=NA}

\item{counts_file}{A tab-separated file of a matrix of read counts. As per 
\bold{counts_matrix}. First column should be gene ID, and top row cell ids.}

\item{cell_info_file}{Tab-separated text file of cell information, as per
\bold{cell_info_table}. Columns must have names.}

\item{gene_info_file}{Optional tab-separated text file of gene information, 
as per \bold{gene_info_file}. Columns must have names. Default=NA}
}
\value{
A SummarisedExperiment object containing the count data, cell info
and gene info.
}
\description{
Create a SummarizedExperiment object (dataset_se) from a count matrix, cell 
information and optionally gene information.

\code{load_se_from_files} is a wrapper for \code{load_se_from_tables} that
will read in tables from specified files.
}
\details{
This function makes a SummarizedExperiment object in a form that
should work for celaref functions. Specifically, that means it will have an
'ID' feild for genes (view with \code{rowData(dataset_se)}), and both
'cell_sample' and 'group' feild for cells (view with
\code{colData(dataset_se)}). See parameters for detail.
Additionally, the counts will be an integer matrix (not a
sparse matrix), and the \emph{group} feild (but not \emph{cell_sample}
or \emph{ID}) will be a factor.

Note that data will be subsetted to cells present in both the counts matrix
and cell info, this is handy for loading subsets of cells.
However, if \bold{gene_info_file} is defined, all genes must match exactly.

The \code{load_se_from_files} form of this function will run the same 
checks, but will read everything from files in one go. The 
\code{load_se_from_tables}
form is perhaps more useful when the annotations need to be modified (e.g. 
programmatically adding a different gene identifier, renaming groups, 
removing unwanted samples).

Note that the SummarizedExperiment object can also be created without using
these functions, it just needs the \emph{cell_sample}, \emph{ID} and
\emph{group} feilds as described above. Since sometimes it might be easier
to add these to an existing \emph{SummarizedExperiment} from upstream
analyses.
}
\section{Functions}{
\itemize{
\item \code{load_se_from_files}: To read from files
}}

\examples{

# From data frames (or a matrix for counts) :
demo_se <- load_se_from_tables(counts_matrix=demo_counts_matrix, 
                               cell_info_table=demo_cell_info_table)
demo_se <- load_se_from_tables(counts_matrix=demo_counts_matrix, 
                               cell_info_table=demo_cell_info_table, 
                               gene_info_table=demo_gene_info_table)

# Or from data files : 
counts_filepath    <- system.file("extdata", "sim_query_counts.tab",    package = "celaref")
cell_info_filepath <- system.file("extdata", "sim_query_cell_info.tab", package = "celaref")
gene_info_filepath <- system.file("extdata", "sim_query_gene_info.tab", package = "celaref")

demo_se <- load_se_from_files(counts_file=counts_filepath, cell_info_file=cell_info_filepath)
demo_se <- load_se_from_files(counts_file=counts_filepath, cell_info_file=cell_info_filepath, 
                              gene_info_file=gene_info_filepath )

}
\seealso{
\href{https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html}{SummarizedExperiment} For general doco on the SummarizedExperiment objects.

Other Data loading functions: \code{\link{contrast_each_group_to_the_rest_for_norm_ma_with_limma}},
  \code{\link{load_dataset_10Xdata}}
}
\concept{Data loading functions}
\concept{Data-loading functions}
