suppressMessages(library(lib, character.only = TRUE))
TRUE
}, error = function(e) { FALSE })
if (!success) {
return(lib)
}
})
map_chr(libs, function(lib) {
success <- tryCatch({
suppressMessages(library(lib, character.only = TRUE))
TRUE
}, error = function(e) { FALSE })
if (!success) {
return(lib)
}
})
failed_libs <- map_chr(libs, function(lib) {
success <- tryCatch({
suppressMessages(library(lib, character.only = TRUE))
TRUE
}, error = function(e) { FALSE })
if (!success) {
return(lib)
} else {
return("")
}
})
failed_libs
failed_libs <- map_chr(libs, function(lib) {
success <- tryCatch({
suppressMessages(library(lib, character.only = TRUE))
TRUE
}, error = function(e) { FALSE })
if (!success) {
return(lib)
} else {
return(NULL)
}
})
# Normalize by gene length
tb_nl <-
# CodonProfile S4 object:
# slot:
# species: chr
# gene_meta:
# gene_id: chr vector
# gene_symbol: chr vector
# gene_length: int vector
# codon_observed: int matrix
# codon_expected: float matrix
# codon_o2e: float matrix
tryLoadLibs <- function(libs) {
libs <- c("asdfadsf", "dfdfd", "TxDb.Mmusculus.UCSC.mm10.knownGene")
failed_libs <- map_chr(libs, function(lib) {
success <- tryCatch({
suppressMessages(library(lib, character.only = TRUE))
TRUE
}, error = function(e) { FALSE })
if (!success) {
return(lib)
} else {
return("")
}
})
if (length(failed_libs) > 0) {
abort("Failed to load the following packages: ",
paste0(failed_libs, collapse = ", "))
}
}
tryLoadLibs(c("casdfd", "asdf"))
# Normalize by gene length
tb_nl <-
# CodonProfile S4 object:
# slot:
# species: chr
# gene_meta:
# gene_id: chr vector
# gene_symbol: chr vector
# gene_length: int vector
# codon_observed: int matrix
# codon_expected: float matrix
# codon_o2e: float matrix
tryLoadLibs <- function(libs) {
libs <- c("asdfadsf", "dfdfd", "TxDb.Mmusculus.UCSC.mm10.knownGene")
failed_libs <- map_chr(libs, function(lib) {
success <- tryCatch({
suppressMessages(library(lib, character.only = TRUE))
TRUE
}, error = function(e) { FALSE })
if (!success) {
return(lib)
} else {
return("")
}
})
if (length(failed_libs) > 0) {
abort(paste0("Failed to load the following packages: ", paste0(failed_libs, collapse = ", ")))
}
}
tryLoadLibs(c("casdfd", "asdf"))
# Normalize by gene length
tb_nl <-
# CodonProfile S4 object:
# slot:
# species: chr
# gene_meta:
# gene_id: chr vector
# gene_symbol: chr vector
# gene_length: int vector
# codon_observed: int matrix
# codon_expected: float matrix
# codon_o2e: float matrix
tryLoadLibs <- function(libs) {
libs <- c("asdfadsf", "dfdfd", "TxDb.Mmusculus.UCSC.mm10.knownGene")
failed_libs <- map_chr(libs, function(lib) {
success <- tryCatch({
suppressMessages(library(lib, character.only = TRUE))
TRUE
}, error = function(e) { FALSE })
if (!success) {
return(lib)
} else {
return("")
}
})
failed_libs <- failed_libs[failed_libs != ""]
if (length(failed_libs) > 0) {
abort(paste0("Failed to load the following packages: ", paste0(failed_libs, collapse = ", ")))
}
}
tryLoadLibs(c("casdfd", "asdf"))
checkLibs <- function(species) {
if (species == "mm10") {
libs <- c("TxDb.Mmusculus.UCSC.mm10.knownGene",
"BSgenome.Mmusculus.UCSC.mm10",
"org.Mm.eg.db")
tryLoadLibs(libs)
} else if (species == "hg38") {
# todo
}
}
checkLibs("mm10")
# Normalize by gene length
tb_nl <-
# CodonProfile S4 object:
# slot:
# species: chr
# gene_meta:
# gene_id: chr vector
# gene_symbol: chr vector
# gene_length: int vector
# codon_observed: int matrix
# codon_expected: float matrix
# codon_o2e: float matrix
tryLoadLibs <- function(libs) {
failed_libs <- map_chr(libs, function(lib) {
success <- tryCatch({
suppressMessages(library(lib, character.only = TRUE))
TRUE
}, error = function(e) { FALSE })
if (!success) {
return(lib)
} else {
return("")
}
})
failed_libs <- failed_libs[failed_libs != ""]
if (length(failed_libs) > 0) {
abort(paste0("Failed to load the following packages: ", paste0(failed_libs, collapse = ", ")))
}
}
checkLibs <- function(species) {
if (species == "mm10") {
libs <- c("TxDb.Mmusculus.UCSC.mm10.knownGene",
"BSgenome.Mmusculus.UCSC.mm10",
"org.Mm.eg.db")
tryLoadLibs(libs)
} else if (species == "hg38") {
# todo
}
}
checkLibs("mm10")
mapIds(org.Mm.eg.db, keys = "100012", column = "SYMBOL", keytype = "ENTREZID", multiVals = "CharacterList")
mapIds(org.Mm.eg.db, keys = "1004012", column = "SYMBOL", keytype = "ENTREZID", multiVals = "CharacterList")
mapIds(org.Mm.eg.db, keys = "100012", column = "SYMBOL", keytype = "ENTREZID", multiVals = "CharacterList")
mapIds(org.Mm.eg.db, keys = "100010", column = "SYMBOL", keytype = "ENTREZID", multiVals = "CharacterList")
mapIds(org.Mm.eg.db, keys = "500010", column = "SYMBOL", keytype = "ENTREZID", multiVals = "CharacterList")
mapIds(org.Mm.eg.db, keys = c("1", "200010"), column = "SYMBOL", keytype = "ENTREZID", multiVals = "CharacterList")
mapIds(org.Mm.eg.db, keys = c("1", "100010"), column = "SYMBOL", keytype = "ENTREZID", multiVals = "CharacterList")
cds_by_gene <- cdsBy(txdb, "gene")
cds_by_gene
# Add gene_symbol
geneAll <- function(x) {
return(paste0(x, ","))
}
tb_codon <- tb_codon %>% select(gene_id, gene_symbol, gene_length, everything())
gene_symbol <- mapIds(org.Mm.eg.db, keys = names(cds_by_gene), column = "SYMBOL", keytype = "ENTREZID", multiVals = function(x) { return(paste0(x, ",")) })
gene_symbol <- suppressMessages(mapIds(org.Mm.eg.db, keys = names(cds_by_gene), column = "SYMBOL", keytype = "ENTREZID", multiVals = function(x) { return(paste0(x, ",")) }))
gene_symbol
names(gene_symbol)
names(gene_symbol) == names(cds_by_gene)
cds_by_gene
genes <- extractTranscriptSeqs(bsgenome, cds_by_gene)
cds_by_gene <- cds_by_gene[single_chr]
# Filter out genes spanning multiple chromosomes, otherwise extractTranscriptSeqs() complains
single_chr <- map_lgl(cds_by_gene, function(cds) {
chr_num <- length(unique(seqnames(cds)))
if (chr_num <= 1) {
return(TRUE)
} else {
return(FALSE)
}
})
gene_symbol <- suppressMessages(mapIds(org.Mm.eg.db, keys = names(cds_by_gene), column = "SYMBOL", keytype = "ENTREZID", multiVals = function(x) { return(paste0(x, ",")) }))
genes <- extractTranscriptSeqs(bsgenome, cds_by_gene)
genes
gene_length <- width(extractTranscriptSeqs(bsgenome, cds_by_gene))
gene_length
tb <-   tibble(
gene_id = names(cds_by_gene),
gene_symbol = gene_symbol,
gene_length = gene_length
)
tb
gene_symbol <- suppressMessages(mapIds(org.Mm.eg.db, keys = names(cds_by_gene), column = "SYMBOL", keytype = "ENTREZID", multiVals = function(x) { return(paste0(x, collapse. = ",")) }))
gene_length <- width(extractTranscriptSeqs(bsgenome, cds_by_gene))
tibble(
gene_id = names(cds_by_gene),
gene_symbol = gene_symbol,
gene_length = gene_length
)
gene_symbol
gene_symbol <- suppressMessages(mapIds(org.Mm.eg.db, keys = names(cds_by_gene), column = "SYMBOL", keytype = "ENTREZID", multiVals = function(x) { return(paste0(x, collapse = ",")) }))
gene_symbol
gene_symbol <- suppressMessages(mapIds(org.Mm.eg.db, keys = names(cds_by_gene), column = "SYMBOL", keytype = "ENTREZID", multiVals = function(x) { return(paste0(x, collapse = ",")) })) %>%
as.character()
gene_symbol
tb_gene_meta <- tibble(
gene_id = names(cds_by_gene),
gene_symbol = gene_symbol,
gene_length = gene_length
)
# Helper3: obtain gene meta info
getGeneMeta <- function(species) {
# genes spanning multiple chromosomes will be filtered out
if (species == "mm10") {
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
bsgenome <- BSgenome.Mmusculus.UCSC.mm10
org <- org.Mm.eg.db
} else if (species == "hg38") {
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
bsgenome <- BSgenome.Hsapiens.UCSC.hg38
org <- org.Hs.eg.db
}
cds_by_gene <- cdsBy(txdb, "gene")
# Filter out genes spanning multiple chromosomes, otherwise extractTranscriptSeqs() complains
single_chr <- map_lgl(cds_by_gene, function(cds) {
chr_num <- length(unique(seqnames(cds)))
if (chr_num <= 1) {
return(TRUE)
} else {
return(FALSE)
}
})
cds_by_gene <- cds_by_gene[single_chr]
gene_symbol <- suppressMessages(mapIds(org.Mm.eg.db, keys = names(cds_by_gene), column = "SYMBOL", keytype = "ENTREZID", multiVals = function(x) { return(paste0(x, collapse = ",")) })) %>%
as.character()
gene_length <- width(extractTranscriptSeqs(bsgenome, cds_by_gene))
tb_gene_meta <- tibble(
gene_id = names(cds_by_gene),
gene_symbol = gene_symbol,
gene_length = gene_length
)
return(tb_gene_meta)
}
tb <- getGeneMeta("mm10")
tb
# Constructor
setGeneric("CodonProfile", function(species, filter = c("remove", "trim")) standardGeneric("CodonProfile"))
tb_gene_meta
cds_by_gene
# Helper3: obtain gene meta info
getGeneMeta <- function(species) {
# genes spanning multiple chromosomes will be filtered out
if (species == "mm10") {
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
bsgenome <- BSgenome.Mmusculus.UCSC.mm10
org <- org.Mm.eg.db
} else if (species == "hg38") {
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
bsgenome <- BSgenome.Hsapiens.UCSC.hg38
org <- org.Hs.eg.db
}
cds_by_gene <- cdsBy(txdb, "gene")
# Filter out genes spanning multiple chromosomes, otherwise extractTranscriptSeqs() complains
single_chr <- map_lgl(cds_by_gene, function(cds) {
chr_num <- length(unique(seqnames(cds)))
if (chr_num <= 1) {
return(TRUE)
} else {
return(FALSE)
}
})
cds_by_gene <- cds_by_gene[single_chr]
gene_symbol <- suppressMessages(mapIds(org.Mm.eg.db, keys = names(cds_by_gene), column = "SYMBOL", keytype = "ENTREZID", multiVals = function(x) { return(paste0(x, collapse = ",")) })) %>%
as.character()
gene_length <- width(extractTranscriptSeqs(bsgenome, cds_by_gene))
tb_gene_meta <- tibble(
gene_id = names(cds_by_gene),
gene_symbol = gene_symbol,
gene_length = gene_length,
cds_by_gene = cds_by_gene
)
return(tb_gene_meta)
}
tb_gene_meta <- getGeneMeta(species)
speceis
species
species
species <- "mm10"
tb_gene_meta <- getGeneMeta(species)
cds_by_gene
# Helper3: obtain gene meta info
getGeneMeta <- function(species) {
# genes spanning multiple chromosomes will be filtered out
if (species == "mm10") {
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
bsgenome <- BSgenome.Mmusculus.UCSC.mm10
org <- org.Mm.eg.db
} else if (species == "hg38") {
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
bsgenome <- BSgenome.Hsapiens.UCSC.hg38
org <- org.Hs.eg.db
}
cds_by_gene <- cdsBy(txdb, "gene")
# Filter out genes spanning multiple chromosomes, otherwise extractTranscriptSeqs() complains
single_chr <- map_lgl(cds_by_gene, function(cds) {
chr_num <- length(unique(seqnames(cds)))
if (chr_num <= 1) {
return(TRUE)
} else {
return(FALSE)
}
})
cds_by_gene <- cds_by_gene[single_chr]
gene_symbol <- suppressMessages(mapIds(org.Mm.eg.db, keys = names(cds_by_gene), column = "SYMBOL", keytype = "ENTREZID", multiVals = function(x) { return(paste0(x, collapse = ",")) })) %>%
as.character()
gene_seq <- extractTranscriptSeqs(bsgenome, cds_by_gene)
gene_length <- width(gene_seq)
tb_gene_meta <- tibble(
gene_id = names(cds_by_gene),
gene_symbol = gene_symbol,
gene_length = gene_length,
gene_seq = gene_seq
)
return(tb_gene_meta)
}
tb_gene_meta <- getGeneMeta(species)
extractTranscriptSeqs(bsgenome, cds_by_gene[1:10])
as.character(extractTranscriptSeqs(bsgenome, cds_by_gene[1:10]))
# Helper3: obtain gene meta info
getGeneMeta <- function(species) {
# genes spanning multiple chromosomes will be filtered out
if (species == "mm10") {
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
bsgenome <- BSgenome.Mmusculus.UCSC.mm10
org <- org.Mm.eg.db
} else if (species == "hg38") {
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
bsgenome <- BSgenome.Hsapiens.UCSC.hg38
org <- org.Hs.eg.db
}
cds_by_gene <- cdsBy(txdb, "gene")
# Filter out genes spanning multiple chromosomes, otherwise extractTranscriptSeqs() complains
single_chr <- map_lgl(cds_by_gene, function(cds) {
chr_num <- length(unique(seqnames(cds)))
if (chr_num <= 1) {
return(TRUE)
} else {
return(FALSE)
}
})
cds_by_gene <- cds_by_gene[single_chr]
gene_symbol <- suppressMessages(mapIds(org.Mm.eg.db, keys = names(cds_by_gene), column = "SYMBOL", keytype = "ENTREZID", multiVals = function(x) { return(paste0(x, collapse = ",")) })) %>%
as.character()
gene_seq <- as.character(extractTranscriptSeqs(bsgenome, cds_by_gene))
gene_length <- width(gene_seq)
tb_gene_meta <- tibble(
gene_id = names(cds_by_gene),
gene_symbol = gene_symbol,
gene_length = gene_length,
gene_seq = gene_seq
)
return(tb_gene_meta)
}
tb_gene_meta <- getGeneMeta(species)
tb_gene_meta
# Helper4: get codon usage
getCodonUsage <- function(seq, gene_id) {
seq <- toupper(as.character(seq))
codons <- str_sub(seq, seq(1, nchar(seq) - 2, 3), seq(3, nchar(seq), 3))
codon_count <- table(factor(codons, levels = names(GENETIC_CODE)))
tb <- as_tibble(codon_count, .name_repair = ~ c("codon", "freq"))
tb$gene_id <- gene_id
return(tb)
}
codon_usage <- map2_df(tb_gene_meta$gene_seq, tb_gene_meta$gene_id, function(cds_seq, gene_id) {
cds_length <- nchar(cds_seq)
if (nchar(cds_length) %% 3 != 0) {
filter_c <<- filter_c + 1
if (filter == "trim") {
return(getCodonUsage(cds_seq, gene_id))
}
} else {
return(getCodonUsage(cds_seq, gene_id))
}})
codon_usage
tb_codon <- codon_usage %>% pivot_wider(names_from = c(codon), values_from = freq)
tb_codon
tb_codon[, order(colnames(tb_codon))]
inform(paste0("Getting codon counts: finished! ", filter_c, " genes remove!"))
filter_c <- 0
codon_usage <- map2_df(tb_gene_meta$gene_seq, tb_gene_meta$gene_id, function(cds_seq, gene_id) {
cds_length <- nchar(cds_seq)
if (nchar(cds_length) %% 3 != 0) {
filter_c <<- filter_c + 1
if (filter == "trim") {
return(getCodonUsage(cds_seq, gene_id))
}
} else {
return(getCodonUsage(cds_seq, gene_id))
}})
filter_c
inform(paste0("Getting codon counts: finished! ", filter_c, " genes remove!"))
dim(tb_gene_meta)
dim(tb_gene_meta)[[1]]
inform(paste0("Getting codon counts: finished! ", filter_c, "/", dim(tb_gene_meta)[[1]], " genes remove!"))
tb_codon
colnames(tb_codon)
tb_codon
tb_gene_meta[tb_gene_meta$gene_id %in% tb_codon$gene_id]
tb_gene_meta[tb_gene_meta$gene_id %in% tb_codon$gene_id, ]
genome_size <- sum(tb_gene_meta[tb_gene_meta$gene_id %in% tb_codon$gene_id, "gene_length"])
genome_size
tb_codon$TTT
sum(tb_codon$TTT)
colnames(tb_codon)
codon_t <-
map_df(colnames(tb_codon), function(codon) {
if (codon != "gene_id") {
c <- sum(tb_codon$codon)
return(tibble(
codon = codon,
count = c
))
}
})
warnings()
codon_t <-
map_df(colnames(tb_codon), function(codon) {
if (codon != "gene_id") {
c <- sum(tb_codon$codon)
return(tibble(
codon = codon,
count = c
))
} else {
return(tibble())
}
})
codon_t
codon_t <- map_df(colnames(tb_codon), function(codon) {
if (codon != "gene_id") {
c <- sum(tb_codon$codon)
return(tibble(
codon = codon,
count = c
))
}
})
warnings()
codon_t <- map_df(colnames(tb_codon), function(codon) {
if (codon != "gene_id") {
c <- sum(tb_codon$codon)
return(tibble(
test = codon,
count = c
))
}
})
warnings()
codon_t <- map_df(colnames(tb_codon), function(codon) {
if (codon != "gene_id") {
c <- sum(tb_codon[[codon]])
return(tibble(
test = codon,
count = c
))
}
})
codon_t
codon_t <- map_df(colnames(tb_codon), function(codon) {
if (codon != "gene_id") {
c <- sum(tb_codon[[codon]])
return(tibble(
codon = codon,
count = c
))
}
})
codon_t
?saveRDS
