caseStudy3-EnvComplexity-phyloseq
miaSim package authors
2023-05-29
Source:vignettes/phyloseq/caseStudy3-EnvironmentalComplexity.Rmd
caseStudy3-EnvironmentalComplexity.RmdIntroduction
This document provides the detailed code required to replicate case study 3 discussed in Gao et al. (2023). Methods in Ecology and Evolution. DOI: 10.1111/2041-210X.14129
This replication study has been implemented with phyloseq. The TreeSE package has been since then upgraded to use the TreeSE data container.
For general instructions and examples on using the miaSim package tools, see the vignette. miaSim implements tools for microbiome data simulation based on different ecological modeling assumptions. These can be used to simulate species abundance matrices, including time series. For a detailed function documentation, see the function reference page
Case study 3: Environmental complexity impacts on different communities
(Reference: Non-additive microbial community responses to environmental complexity)
The aim of this case study is to recalculate the patterns shown in the existing publication in the above reference. More specifically, the left part of this Figure 2. 
In this case study, consumer-resource model was used. The number of species in 3 distinct communities varied so that communities are named after the number of species (i.e. com13, com3, and com4).
For each community, a floor of different numbers of carbon resources (nutrients) is set from 1, 2, 4, 8, 16, to 32.
In the original experiment, value of OD600 is selected as y-axis to represent the growth yield. In our simulations, however, the total number of organisms(number of all individuals) is used to reflect the growth yield.
Setup
Load dependencies
Set random seed
set.seed(42)Set number of repeats
n_rep <- 50Initial the output dataframes to store data
result_df <- data.frame(
n_species = integer(),
theta = numeric(),
i = integer(),
n_resources = integer(),
value = numeric()
)
result_df2 <- data.frame(
matrix(NA, nrow = 0, ncol = 13, dimnames = list(NULL, paste0("sp", seq_len(13))))
)
sorensen_df <- data.frame(
n_species = integer(),
theta = numeric(),
rho_mean = numeric(),
rho_sd = numeric()
)Generating function. This function generates a data frame, where each row is arranged in an increasing dissimilarity to the first row.
gradient_df_generator <- function(n_row, n_col, density_row, max_gradient, error_interval){
list_initial <- list()
dissimilarity.gradient <- seq(from = 0, to = max_gradient, length.out = n_row)
for (i in seq_len(n_row)){
print(i)
if (i == 1){
row_temp <- rbeta(n_col, 1, n_col)
col_to_remove <- sample(x = seq_len(n_col), size = n_col-n_col*density_row)
row_temp[col_to_remove] <- 0
list_initial[[i]] <- row_temp
} else {
while (length(list_initial) < i) {
row_temp <- rbeta(n_col, 1, n_col)
col_to_remove <- sample(x = seq_len(n_col), size = n_col-n_col*density_row)
row_temp[col_to_remove] <- 0
diff_temp <- abs(vegdist(rbind(list_initial[[1]], row_temp), method = "bray") - dissimilarity.gradient[i])
if (diff_temp < error_interval) {
list_initial[[i]] <- row_temp
}
}
}
}
dataframe_to_return <- as.data.frame(t(matrix(unlist(list_initial), ncol = n_row)))
return(dataframe_to_return)
}Load parameters used by Pacheco et al., and initialized the community data frame. Note that in this step, the value range of theta has been extended.
n_species_types <- c(13, 3, 4)
theta_types <- c(1, 0.75, 0.5, 0.25, 0.1, 0.05)
n_resources_types <- c(1,2,4,8,16,32)
community.initial.df <- as.list(
lapply(n_species_types,
gradient_df_generator,
n_row = n_rep,
density_row = 1,
max_gradient = 0.7,
error_interval = 0.15)
)## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45
## [1] 46
## [1] 47
## [1] 48
## [1] 49
## [1] 50
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45
## [1] 46
## [1] 47
## [1] 48
## [1] 49
## [1] 50
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45
## [1] 46
## [1] 47
## [1] 48
## [1] 49
## [1] 50
Loop for different combinations of (number of species X theta X number of repetations X number of resources)
for (n_species in n_species_types){
for (theta in theta_types) {
sorensen <- c()
for (i in seq_len(n_rep)){
for (n_resources in n_resources_types) {
### generate E ####
Etest <- randomE(n_species = n_species,
n_resources = n_resources,
mean_consumption = theta*n_resources,
exact = TRUE)
### calculate rho ####
if (n_resources == max(n_resources_types)){
Etest_pos <- Etest
Etest_pos[Etest_pos<0] <- 0
for (j in seq_len(n_species - 1)){
for (k in 2:n_species){
sorensen <- c(sorensen,
sum(apply(Etest_pos[c(j,k),], 2, min)))
}
}
}
if (n_resources > 1){
Priority <- t(apply(matrix(sample(n_species * n_resources), nrow = n_species), 1, order))
Priority <- (Etest > 0) * Priority
} else {
Priority <- NULL
}
print(paste0("n_species=",n_species, " theta=",theta, " i=", i, " n_resources=", n_resources))
x0temp <- as.numeric(community.initial.df[[match(n_species, n_species_types)]][i,])
x0temp <- 10*x0temp/sum(x0temp)
CRMtest <- simulateConsumerResource(n_species = n_species,
n_resources = n_resources,
x0 = x0temp, #rep(10, n_species),
resources = rep(100, n_resources),
E = Etest,
# trophic_priority = Priority,
stochastic = TRUE,
t_end = 1000,
t_step = 1,
t_store = 1000)
CRMspecies <- getCommunity(CRMtest)
CRMspeciesTotal <- sum(CRMspecies)
result_df[nrow(result_df)+1,] <- c(n_species, theta, i, n_resources, CRMspeciesTotal)
result_df2[nrow(result_df2)+1,] <- c(CRMspecies, rep(NA, 13-length(CRMspecies)))
# makePlotRes(CRMtest$resources)
# makePlot(CRMtest$matrix)
}
}
rho_mean <- mean(sorensen)
rho_sd <- var(sorensen)
sorensen_df[nrow(sorensen_df)+1, ] <- c(n_species, theta, rho_mean, rho_sd)
}
}
p_fig2_result_df <- ggplot(result_df, aes(x = n_resources, y = value, group = n_resources)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(alpha = 0.2, width = 0.2) +
facet_grid(. ~ factor(n_species, levels = n_species_types)) +
theme_bw() +
scale_x_continuous(trans = "log2", breaks = n_resources_types) +
xlab("number of resources") +
ylab("growth yield (number of individuals)")
p_fig2_result_df