% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/filter_nonorthologs.R
\name{filter_nonorthologs}
\alias{filter_nonorthologs}
\title{Filter non-orthologs}
\usage{
filter_nonorthologs(
  filenames,
  input_species = NULL,
  convert_nonhuman_genes = TRUE,
  annot_levels = NULL,
  suffix = "_orthologs",
  method = "homologene",
  non121_strategy = "drop_both_species",
  verbose = TRUE,
  ...
)
}
\arguments{
\item{filenames}{List of file names for sct_data saved as \emph{.rda} files.}

\item{input_species}{Which species the gene names in \code{exp} come from.}

\item{convert_nonhuman_genes}{Whether to convert the \code{exp}
row names to human gene names.}

\item{annot_levels}{[Optional] Names of each annotation level.}

\item{suffix}{Suffix to add to the file name (right before \emph{.rda}).}

\item{method}{R package to use for gene mapping:
\describe{
 \item{\code{"gprofiler"}}{Slower but more species and genes.}
 \item{\code{"homologene"}}{Faster but fewer species and genes.}
 \item{\code{"babelgene"}}{Faster but fewer species and genes.
 Also gives consensus scores for each gene mapping based on a
 several different data sources.}
}}

\item{non121_strategy}{How to handle genes that don't have
1:1 mappings between \code{input_species}:\code{output_species}.
Options include:\cr
\describe{
\item{\code{"drop_both_species" or "dbs" or 1}}{
Drop genes that have duplicate
mappings in either the \code{input_species} or \code{output_species}
(\emph{DEFAULT}).}
\item{\code{"drop_input_species" or "dis" or 2}}{
Only drop genes that have duplicate
mappings in the \code{input_species}.}
\item{\code{"drop_output_species" or "dos" or 3}}{
Only drop genes that have duplicate
mappings in the \code{output_species}.}
\item{\code{"keep_both_species" or "kbs" or 4}}{
Keep all genes regardless of whether
they have duplicate mappings in either species.}
\item{\code{"keep_popular" or "kp" or 5}}{
Return only the most "popular" interspecies ortholog mappings.
 This procedure tends to yield a greater number of returned genes
 but at the cost of many of them not being true biological 1:1 orthologs.}
 \item{\code{"sum","mean","median","min" or "max"}}{
 When \code{gene_df} is a matrix and \code{gene_output="rownames"},
  these options will aggregate many-to-one gene mappings
  (\code{input_species}-to-\code{output_species})
  after dropping any duplicate genes in the \code{output_species}.
 }
}}

\item{verbose}{Print messages.}

\item{...}{
  Arguments passed on to \code{\link[orthogene:convert_orthologs]{orthogene::convert_orthologs}}
  \describe{
    \item{\code{gene_df}}{Data object containing the genes
(see \code{gene_input} for options on how
the genes can be stored within the object).\cr
Can be one of the following formats:\cr
\describe{
\item{\code{matrix}}{A sparse or dense matrix.}
\item{\code{data.frame}}{A \code{data.frame},
 \code{data.table}. or \code{tibble}.}
\item{\code{list}}{A \code{list} or character \code{vector}.}
}
Genes, transcripts, proteins, SNPs, or genomic ranges
 can be provided in any format
(HGNC, Ensembl, RefSeq, UniProt, etc.) and will be
automatically converted to gene symbols unless
specified otherwise with the \code{...} arguments.\cr
\emph{Note}: If you set \code{method="homologene"}, you
must either supply genes in gene symbol format (e.g. "Sox2")
 OR set \code{standardise_genes=TRUE}.}
    \item{\code{gene_input}}{Which aspect of \code{gene_df} to
get gene names from:\cr
\describe{
\item{\code{"rownames"}}{From row names of data.frame/matrix.}
\item{\code{"colnames"}}{From column names of data.frame/matrix.}
\item{\code{<column name>}}{From a column in \code{gene_df},
 e.g. \code{"gene_names"}.}
}}
    \item{\code{gene_output}}{How to return genes.
Options include:\cr
\describe{
\item{\code{"rownames"}}{As row names of \code{gene_df}.}
\item{\code{"colnames"}}{As column names of \code{gene_df}.}
\item{\code{"columns"}}{As new columns "input_gene", "ortholog_gene"
(and "input_gene_standard" if \code{standardise_genes=TRUE})
in \code{gene_df}.}
\item{\code{"dict"}}{As a dictionary (named list) where the names
are input_gene and the values are ortholog_gene.}
\item{\code{"dict_rev"}}{As a reversed dictionary (named list)
where the names are ortholog_gene and the values are input_gene.}
}}
    \item{\code{standardise_genes}}{If \code{TRUE} AND
\code{gene_output="columns"}, a new column "input_gene_standard"
will be added to \code{gene_df} containing standardised HGNC symbols
identified by \link[gprofiler2]{gorth}.}
    \item{\code{output_species}}{Name of the output species (e.g. "human","chicken").
Use \link[orthogene]{map_species} to return a full list
 of available species.}
    \item{\code{drop_nonorths}}{Drop genes that don't have an ortholog
in the \code{output_species}.}
    \item{\code{agg_fun}}{Aggregation function passed to 
 \link[orthogene]{aggregate_mapped_genes}. 
Set to \code{NULL} to skip aggregation step (default).}
    \item{\code{mthreshold}}{Maximum number of ortholog names per gene to show.
Passed to \link[gprofiler2]{gorth}.
Only used when \code{method="gprofiler"} (\emph{DEFAULT : }\code{Inf}).}
    \item{\code{as_sparse}}{Convert \code{gene_df} to a sparse matrix.
Only works if \code{gene_df} is one of the following classes:\cr
\itemize{
 \item{\code{matrix}}
 \item{\code{Matrix}}
 \item{\code{data.frame}}
 \item{\code{data.table}}
 \item{\code{tibble}}
}
If \code{gene_df} is a sparse matrix to begin with,
it will be returned as a sparse matrix
 (so long as \code{gene_output=} \code{"rownames"} or \code{"colnames"}).}
    \item{\code{as_DelayedArray}}{Convert aggregated matrix to
\link[DelayedArray]{DelayedArray}.}
    \item{\code{sort_rows}}{Sort \code{gene_df} rows alphanumerically.}
    \item{\code{gene_map}}{A \link[base]{data.frame} that maps the current gene names
to new gene names. 
This function's behaviour will adapt to different situations as follows: 
\describe{
\item{\code{gene_map=<data.frame>}}{When a data.frame containing the
gene key:value columns 
(specified by \code{input_col} and \code{output_col}, respectively)
is provided, this will be used to perform aggregation/expansion.}
\item{\code{gene_map=NULL} and \code{input_species!=output_species}}{
A \code{gene_map} is automatically generated by
 \link[orthogene]{map_orthologs} to perform inter-species 
 gene aggregation/expansion.}
\item{\code{gene_map=NULL} and \code{input_species==output_species}}{
A \code{gene_map} is automatically generated by
 \link[orthogene]{map_genes} to perform within-species 
 gene gene symbol standardization and aggregation/expansion.}
}}
    \item{\code{input_col}}{Column name within \code{gene_map} with gene names matching 
the row names of \code{X}.}
    \item{\code{output_col}}{Column name within \code{gene_map} with gene names
that you wish you map the row names of \code{X} onto.}
  }}
}
\value{
List of the filtered CellTypeData file names.
}
\description{
\code{filter_nonorthologs} Takes the filenames of CellTypeData files,
loads them,  drops any genes which don't have a 1:1 orthologs with humans,
and then convert the gene to human orthologs.
The new files are then saved to disk, appending
'_orthologs' to the file name.
}
\details{
\bold{Note:} This function replaces the original
 \code{filter_genes_without_1to1_homolog} function.
\code{filter_genes_without_1to1_homolog} is
now a wrapper for \code{filter_nonorthologs}.
}
\examples{
# Load the single cell data
ctd <- ewceData::ctd()
tmp <- tempfile()
save(ctd, file = tmp)
fNames_ALLCELLS_orths <- EWCE::filter_nonorthologs(filenames = tmp)
}
