remotes::install_version("VGAM", version = "1.1-3", repos = "http://cran.r-project.org")
remotes::install_version("VGAM", version = "1.1-5", repos = "http://cran.r-project.org")
remotes::install_version("VGAM", version = "1.1-8", repos = "http://cran.r-project.org")
install.packages("VGAM")
plot_multiple_branches_heatmap2 <- function(
cds = NULL,
branches,
branches_name = NULL,
cluster_rows = TRUE,
hclust_method = "ward.D2",
num_clusters = 6,
hmcols = NULL,
add_annotation_row = NULL,
add_annotation_col = NULL,
show_rownames = FALSE,
use_gene_short_name = TRUE,
norm_method = c("vstExprs", "log"),
scale_max = 3,
scale_min = -3,
trend_formula = "~sm.ns(Pseudotime, df=3)",
return_heatmap = FALSE,
cores = 1) {
pseudocount <- 1
if (!(all(branches %in% Biobase::pData(cds)$State)) &
length(branches) == 1) {
stop(
"This function only allows to make multiple branch plots
where branches is included in the pData"
)
}
if (!requireNamespace("Biobase", quietly = TRUE)) {
stop("Package 'Biobase' is required. Please install it.")
}
branch_label <- branches
if (!is.null(branches_name)) {
if (length(branches) != length(branches_name)) {
stop("branches_name should have the same length as branches")
}
branch_label <- branches_name
}
# test whether or not the states passed to branches are true branches
# (not truncks) or there are terminal cells
g <- cds@minSpanningTree
# g <- igraph::upgrade_graph(g)
m <- NULL
# branche_cell_num <- c()
for (branch_in in branches) {
branches_cells <- row.names(subset(Biobase::pData(cds),
State == branch_in))
root_state <- subset(Biobase::pData(cds), Pseudotime == 0)[, "State"]
root_state_cells <- row.names(subset(Biobase::pData(cds),
State == root_state))
if (cds@dim_reduce_type != "ICA") {
root_state_cells <- unique(
paste(
"Y_",
cds@auxOrderingData$DDRTree$pr_graph_cell_proj_closest_vertex[
root_state_cells, ],
sep = ""
)
)
branches_cells <- unique(
paste(
"Y_",
cds@auxOrderingData$DDRTree$pr_graph_cell_proj_closest_vertex[
branches_cells, ],
sep = ""
)
)
}
root_cell <- root_state_cells[which(
igraph::degree(g, v = root_state_cells) == 1)]
tip_cell <- branches_cells[
which(igraph::degree(g, v = branches_cells) == 1)]
traverse_res <- traverseTree(g, root_cell, tip_cell)
path_cells <- names(traverse_res$shortest_path[[1]])
if (cds@dim_reduce_type != "ICA") {
pc_ind <- cds@auxOrderingData$DDRTree$pr_graph_cell_proj_closest_vertex
path_cells <- row.names(pc_ind)[
paste("Y_", pc_ind[, 1], sep = "") %in% path_cells]
}
cds_subset <- cds[, path_cells]
newdata <- data.frame(
Pseudotime =
seq(0, max(
Biobase::pData(cds_subset)$Pseudotime
), length.out = 100)
)
if (requireNamespace("monocle", quietly = TRUE)) {
tmp <- monocle::genSmoothCurves(
cds_subset,
cores = cores,
trend_formula = trend_formula,
relative_expr = TRUE,
new_data = newdata
)
} else {
warning("Cannot find monocle 'monocle' is not installed.")
}
if (is.null(m)) {
m <- tmp
} else {
m <- cbind(m, tmp)
}
}
# remove genes with no expression in any condition
m <- m[!apply(m, 1, sum) == 0, ]
norm_method <- match.arg(norm_method)
# FIXME: this needs to check that vst values can even be computed.
# (They can only be if we're using NB as the expressionFamily)
if (requireNamespace("monocle", quietly = TRUE)) {
if (norm_method == "vstExprs" && is.null(
cds@dispFitInfo[["blind"]]$disp_func) == FALSE) {
m <- monocle::vstExprs(cds, expr_matrix = m)
} else if (norm_method == "log") {
m <- log10(m + pseudocount)
}
} else {
warning("Cannot find monocle 'monocle' is not installed.")
}
# Row-center the data.
m <- m[!apply(m, 1, sd) == 0, ]
m <- Matrix::t(scale(Matrix::t(m), center = TRUE))
m <- m[is.na(row.names(m)) == FALSE, ]
m[is.nan(m)] <- 0
m[m > scale_max] <- scale_max
m[m < scale_min] <- scale_min
heatmap_matrix <- m
row_dist <- as.dist((1 - cor(Matrix::t(heatmap_matrix))) / 2)
row_dist[is.na(row_dist)] <- 1
if (is.null(hmcols)) {
bks <- seq(-3.1, 3.1, by = 0.1)
hmcols <- colorRamps::blue2green2red(length(bks) - 1)
} else {
bks <- seq(-3.1, 3.1, length.out = length(hmcols))
}
if (requireNamespace("pheatmap", quietly = TRUE)) {
ph <- pheatmap::pheatmap(
heatmap_matrix,
useRaster = TRUE,
cluster_cols = FALSE,
cluster_rows = TRUE,
show_rownames = FALSE,
show_colnames = FALSE,
clustering_distance_rows = row_dist,
clustering_method = hclust_method,
cutree_rows = num_clusters,
silent = TRUE,
filename = NA,
breaks = bks,
color = hmcols
)
} else {
warning("Cannot create heatmap. 'pheatmap' is not installed.")
}
annotation_col <- data.frame(
Branch =
factor(rep(rep(
branch_label,
each = 100
)))
)
annotation_row <- data.frame(
Cluster =
factor(cutree(ph$tree_row, num_clusters))
)
col_gaps_ind <- c(seq_len((length(branches) - 1))) * 100
if (!is.null(add_annotation_row)) {
old_colnames_length <- ncol(annotation_row)
annotation_row <- cbind(
annotation_row, add_annotation_row[row.names(annotation_row), ])
colnames(annotation_row)[(old_colnames_length + 1):ncol(annotation_row)] <-
colnames(add_annotation_row)
# annotation_row$bif_time <- add_annotation_row[as.character(
# Biobase::fData(absolute_cds[row.names(annotation_row), ]
# )$gene_short_name), 1]
}
if (use_gene_short_name == TRUE) {
if (is.null(Biobase::fData(cds)$gene_short_name) == FALSE) {
feature_label <- as.character(Biobase::fData(cds)[
row.names(heatmap_matrix), "gene_short_name"])
feature_label[is.na(feature_label)] <- row.names(heatmap_matrix)
row_ann_labels <- as.character(Biobase::fData(cds)[
row.names(annotation_row), "gene_short_name"])
row_ann_labels[is.na(row_ann_labels)] <- row.names(annotation_row)
} else {
feature_label <- row.names(heatmap_matrix)
row_ann_labels <- row.names(annotation_row)
}
} else {
feature_label <- row.names(heatmap_matrix)
row_ann_labels <- row.names(annotation_row)
}
row.names(heatmap_matrix) <- feature_label
row.names(annotation_row) <- row_ann_labels
colnames(heatmap_matrix) <- c(seq_len(ncol(heatmap_matrix)))
if (!(cluster_rows)) {
annotation_row <- NA
}
if (requireNamespace("pheatmap", quietly = TRUE)) {
ph_res <- pheatmap::pheatmap(
heatmap_matrix[, ],
# ph$tree_row$order
useRaster = TRUE,
cluster_cols = FALSE,
cluster_rows = cluster_rows,
show_rownames = show_rownames,
show_colnames = FALSE,
# scale="row",
clustering_distance_rows = row_dist,
# row_dist
clustering_method = hclust_method,
# ward.D2
cutree_rows = num_clusters,
# cutree_cols = 2,
annotation_row = annotation_row,
annotation_col = annotation_col,
gaps_col = col_gaps_ind,
treeheight_row = 20,
breaks = bks,
fontsize = 12,
color = hmcols,
silent = TRUE,
border_color = NA,
filename = NA
)
} else {
warning("Cannot create heatmap. 'pheatmap' is not installed.")
}
# ============================================================================
# prepare data
wide <- cbind(heatmap_matrix, annotation_row) |>
data.frame(check.names = FALSE)
wide.res <- wide |>
dplyr::mutate(gene = rownames(wide), .before = 1) |>
dplyr::rename(cluster = Cluster)
# wide to long
df <- reshape2::melt(
wide.res,
id.vars = c("cluster", "gene"),
variable.name = "cell_type",
value.name = "norm_value"
) |>
dplyr::mutate(cell_type = as.numeric(as.character(cell_type)))
# add cluster name
df$cluster_name <- paste("cluster ", df$cluster, sep = "")
# add gene number
cl.info <- data.frame(table(wide.res$cluster)) |>
dplyr::mutate(Var1 = as.numeric(as.character(Var1))) |>
dplyr::arrange(Var1)
id <- unique(df$cluster_name)
purrr::map_df(seq_along(id), function(x) {
tmp <- df |>
dplyr::filter(cluster_name == id[x])
tmp |>
dplyr::mutate(cluster_name = paste(cluster_name,
" (", cl.info$Freq[x], ")", sep = ""))
}) -> df
# cluster order
df$cluster_name <- factor(df$cluster_name,
levels = paste("cluster ",
cl.info$Var1,
" (", cl.info$Freq, ")", sep = "")
)
# return
prepared_data <- list(
wide.res = wide.res,
long.res = df,
type = "monocle",
geneMode = "all",
geneType = "branched",
pseudotime = annotation_col$Branch
)
if (return_heatmap == TRUE) {
grid::grid.rect(gp = grid::gpar("fill", col = NA))
grid::grid.draw(ph_res$gtable)
return(ph_res)
} else {
return(prepared_data)
}
}
traverseTree <- function(g, starting_cell, end_cells){
distance <- shortest.paths(g, v=starting_cell, to=end_cells)
branchPoints <- which(degree(g) == 3)
path <- shortest_paths(g, from = starting_cell, end_cells)
return(list(shortest_path = path$vpath, distance = distance, branch_points = intersect(branchPoints, unlist(path$vpath))))
}
# remotes::install_version("VGAM", version = "1.1-8", repos = "http://cran.r-project.org")
library(ClusterGVis)
# data("HSMM")
data("BEAM_res")
tmp <- subset(BEAM_res,qval < 1e-4)
rownames(HSMM) %in% rownames(tmp)
load("../../../ClusterGVis-0.1.2/data/HSMM.rda")
library(monocle)
library(igraph)
# return plot
plot_multiple_branches_heatmap2(HSMM[row.names(subset(BEAM_res,qval < 1e-4)),],
branches = c(1,3,4,5),
num_clusters = 4,
cores = 1,
use_gene_short_name = T,
show_rownames = T,
return_heatmap = T)
traverseTree <- function(g, starting_cell, end_cells){
distance <- distances(g, v=starting_cell, to=end_cells)
branchPoints <- which(degree(g) == 3)
path <- shortest_paths(g, from = starting_cell, end_cells)
return(list(shortest_path = path$vpath, distance = distance, branch_points = intersect(branchPoints, unlist(path$vpath))))
}
# return plot
plot_multiple_branches_heatmap2(HSMM[row.names(subset(BEAM_res,qval < 1e-4)),],
branches = c(1,3,4,5),
num_clusters = 4,
cores = 1,
use_gene_short_name = T,
show_rownames = T,
return_heatmap = T)
traverseTree <- function(g, starting_cell, end_cells){
distance <- distances(g, v=starting_cell, to=end_cells)
branchPoints <- which(degree(g) == 3)
path <- distances(g, from = starting_cell, end_cells)
return(list(shortest_path = path$vpath, distance = distance, branch_points = intersect(branchPoints, unlist(path$vpath))))
}
# return plot
plot_multiple_branches_heatmap2(HSMM[row.names(subset(BEAM_res,qval < 1e-4)),],
branches = c(1,3,4,5),
num_clusters = 4,
cores = 1,
use_gene_short_name = T,
show_rownames = T,
return_heatmap = T)
traverseTree <- function(g, starting_cell, end_cells){
distance <- shortest.paths(g, v=starting_cell, to=end_cells)
branchPoints <- which(degree(g) == 3)
path <- shortest_paths(g, from = starting_cell, end_cells)
return(list(shortest_path = path$vpath, distance = distance, branch_points = intersect(branchPoints, unlist(path$vpath))))
}
# return plot
plot_multiple_branches_heatmap2(HSMM[row.names(subset(BEAM_res,qval < 1e-4)),],
branches = c(1,3,4,5),
num_clusters = 4,
cores = 1,
use_gene_short_name = T,
show_rownames = T,
return_heatmap = T)
plot_multiple_branches_heatmap2(HSMM[row.names(BEAM_res),],
branches = c(1,3,4,5),
num_clusters = 4,
cores = 1,
use_gene_short_name = T,
show_rownames = T,
return_heatmap = T)
# return plot
plot_multiple_branches_heatmap2(HSMM[row.names(subset(BEAM_res,qval < 0.05)),],
branches = c(1,3,4,5),
num_clusters = 4,
cores = 1,
use_gene_short_name = T,
show_rownames = T,
return_heatmap = T)
tmp <- subset(BEAM_res,qval < 0.05)
View(tmp)
gc()
data("HSMM")
HSMM[row.names(subset(BEAM_res,qval < 0.05)),]
plot_multiple_branches_heatmap2(HSMM,
branches = c(1,3,4,5),
num_clusters = 4,
cores = 1,
use_gene_short_name = T,
show_rownames = T,
return_heatmap = F)
plot_multiple_branches_heatmap2(HSMM,
branches = c(1,3),
num_clusters = 4,
cores = 1,
use_gene_short_name = T,
show_rownames = T,
return_heatmap = F)
plot_multiple_branches_heatmap2(HSMM,
branches = c(1),
num_clusters = 4,
cores = 1,
use_gene_short_name = T,
show_rownames = T,
return_heatmap = F)
plot_multiple_branches_heatmap2(HSMM,
branches = c(2),
num_clusters = 4,
cores = 1,
use_gene_short_name = T,
show_rownames = T,
return_heatmap = F)
library(devtools)
document()
check()
document()
check()
document()
check()
document()
check()
library(biocViews)
BiocManager::install("BiocCheck")
library(BiocCheck)
BiocCheck()
document()
BiocCheck()
BiocCheckGitClone()
document()
check()
BiocCheckGitClone()
document()
library(devtools)
document()
document()
check()
BiocManager::install("org.Mm.eg.db")
install.packages("C:/Users/JunJun/Desktop/org.Mm.eg.db_3.22.0.tar.gz", repos = NULL, type = "source")
BiocManager::install("TCseq")
document()
check()
BiocManager::install("clusterProfiler")
document()
check()
BiocManager::version()
suppressPackageStartupMessages(library(SummarizedExperiment))
suppressPackageStartupMessages(library(S4Vectors))
library(ClusterGVis)
# a data.frame or SummarizedExperiment object
data("exps")
head(exps)
# check suitable cluster nmbers
getClusters(obj = exps)
# using kemans for clustering
ck <- clusterData(obj = exps,
clusterMethod = "kmeans",
clusterNum = 8)
# construct a SummarizedExperiment object
sce <- SummarizedExperiment(assays = list(counts = exps),
colData = S4Vectors::DataFrame(
sample = colnames(exps),
row.names = colnames(exps))
)
sce
# using kemans for clustering
ck2 <- clusterData(obj = sce,
clusterMethod = "kmeans",
clusterNum = 8)
# plot line only
visCluster(object = ck,
plotType = "line")
# plot heatmap only
visCluster(object = ck,
plotType = "heatmap")
# plot heatmap only
visCluster(object = ck,
plotType = "both")
suppressPackageStartupMessages(library(Seurat))
data("pbmc_subset")
# find markers for every cluster compared to all remaining cells
# report only the positive ones
pbmc.markers.all <- Seurat::FindAllMarkers(pbmc_subset,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25)
# get top 10 genes
pbmc.markers <- pbmc.markers.all |>
dplyr::group_by(cluster) |>
dplyr::top_n(n = 20, wt = avg_log2FC)
# check
head(pbmc.markers)
# prepare data from seurat object
st.data <- prepareDataFromscRNA(object = pbmc_subset,
diffData = pbmc.markers,
showAverage = TRUE)
# check
str(st.data)
# line plot
visCluster(object = st.data,
plotType = "line")
# add gene name
markGenes <- unique(pbmc.markers$gene)[
sample(1:length(unique(pbmc.markers$gene)),40,replace = FALSE)]
# heatmap plot
# pdf('sc1.pdf',height = 10,width = 6,onefile = FALSE)
p <- visCluster(object = st.data,
plotType = "heatmap",
column_names_rot = 45,
markGenes = markGenes,
clusterOrder = c(1:9))
# dev.off()
library(Seurat)
data("pbmc_subset")
# transform into SingleCellExperiment 
sce <- as.SingleCellExperiment(pbmc_subset)
pbmc.markers.all <- Seurat::FindAllMarkers(pbmc_subset,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25)
# get top 10 genes
pbmc.markers <- pbmc.markers.all |>
dplyr::group_by(cluster) |>
dplyr::top_n(n = 20, wt = avg_log2FC)
st.data <- prepareDataFromscRNA(object = sce,
diffData = pbmc.markers[,c("cluster","gene")],
showAverage = TRUE)
visCluster(object = st.data,
plotType = "line")
document()
check()
