corres_four <- optimal_df[optimal_df$codon %in% corres_four_codon,]
optimal_four_codon_nr <- optimal_four$high_No_codon
corres_four_codon_nr <- corres_four$high_No_codon
corres_six_3rd <- chartr("GCAT","ATGC",substr(optimal_six$codon,3,3))
corres_six_codon <- paste(substr(optimal_six$codon,1,2),corres_six_3rd, sep='')
corres_six <- optimal_df[optimal_df$codon %in% corres_six_codon,]
optimal_six_codon_nr <- optimal_six$high_No_codon
corres_six_codon_nr <- corres_six$high_No_codon
optimal_46_codon_nr <- c(optimal_four_codon_nr, optimal_six_codon_nr)
corres_46_codon_nr <- c(corres_four_codon_nr, corres_six_codon_nr)
total_codon46 <- sum(optimal_46_codon_nr,corres_46_codon_nr)
total_codon4 <- sum(optimal_four_codon_nr,corres_four_codon_nr)
total_codon6 <- sum(optimal_six_codon_nr,corres_six_codon_nr)
p46 <- (optimal_46_codon_nr/(optimal_46_codon_nr+corres_46_codon_nr))
p4 <- (optimal_four_codon_nr/(optimal_four_codon_nr+corres_four_codon_nr))
p6 <- (optimal_six_codon_nr/(optimal_six_codon_nr+corres_six_codon_nr))
k <- (1-genomic_gc3)/genomic_gc3
sscu46_values <- vector()
sscu4_values <- vector()
sscu6_values <- vector()
for(i in 1:length(p46)){
sscu46_values[i] <- log(p46[i] * k / (1-p46[i]))
}
for(i in 1:length(p4)){
sscu4_values[i] <- log(p4[i] * k / (1-p4[i]))
}
for(i in 1:length(p6)){
sscu6_values[i] <- log(p6[i] * k / (1-p6[i]))
}
sscu46 <- sum((sscu46_values*(optimal_46_codon_nr+corres_46_codon_nr)))/total_codon46
sscu4 <- sum((sscu4_values*(optimal_four_codon_nr+corres_four_codon_nr)))/total_codon4
sscu6 <- sum((sscu6_values*(optimal_six_codon_nr+corres_six_codon_nr)))/total_codon6
optimal_four$sscu <- sscu4_values
optimal_six$sscu <- sscu6_values
sscu46_sscu_ratio <- sscu46/sscu_index
#result <- list(sscu46=sscu46,sscu4=sscu4,sscu6=sscu6,optimal_four=optimal_four,corres_four=corres_four,optimal_six=optimal_six,corres_six=corres_six)
result <- list(optimal_mutation_index=sscu46,s_index=sscu_index,ratio=sscu46_sscu_ratio)
return(result)
}
else{
return(NA)
#stop("at least one c-ending optimal codon must in the four codon box
#and six codon box")
}
#}
}
optimal_mutation_index(high_cds_file="/home/yu/Data/codon_usage/bee_endosymbionts/sharp_40_highly_dataset/Bbif.ffn",genomic_cds_file="/home/yu/Data/codon_usage/bee_endosymbionts/cds_filtered/Bbif.ffn")
biocLite()
source("https://bioconductor.org/biocLite.R")
detach("package:ade4", unload=TRUE)
detach("package:BiocGenerics", unload=TRUE)
detach("package:BiocInstaller", unload=TRUE)
detach("package:Biostrings", unload=TRUE)
detach("package:IRanges", unload=TRUE)
detach("package:S4Vectors", unload=TRUE)
detach("package:seqinr", unload=TRUE)
detach("package:sscu", unload=TRUE)
detach("package:XVector", unload=TRUE)
detach("package:datasets", unload=TRUE)
detach("package:graphics", unload=TRUE)
detach("package:grDevices", unload=TRUE)
detach("package:methods", unload=TRUE)
detach("package:parallel", unload=TRUE)
detach("package:stats", unload=TRUE)
detach("package:stats4", unload=TRUE)
detach("package:utils", unload=TRUE)
biocLite()
biocLite()
source("https://bioconductor.org/biocLite.R")
biocLite()
akashi_test <- function(contingency_file=NULL){
contingency_data <- read.table(contingency_file,sep=',')
colnames(contingency_data) <- c('a','b','c','d')
a <- contingency_data$a
b <- contingency_data$b
c <- contingency_data$c
d <- contingency_data$d
n <- a + b + c + d
E_a <- (a + b)*(a + c)/n
V_a <- (a + b)*(a + c)*(b + d)*(c + d)/(n^2*(n-1))
Z <- (sum(a) - sum(E_a))/sqrt(sum(V_a))
p <- 1 - pnorm(Z)
odd_ratio <- sum(a*d/n)/sum(b*c/n)
conserved_sites=sum(a,c)
variable_sites=sum(b,d)
results <- list(Z=Z,
p=p,
odd_ratio=odd_ratio,
conserved_sites=conserved_sites,
variable_sites=variable_sites,
ratio=conserved_sites/variable_sites)
return(results)
}
akashi_test()
akashi_test(contingency_file = "~/Data/codon_usage/bee_endosymbionts/akashi_test/test_for_sscu")
akashi_test <- function(contingency_file=NULL){
contingency_data <- read.table(contingency_file,sep=',')
colnames(contingency_data) <- c('a','b','c','d')
a <- contingency_data$a
b <- contingency_data$b
c <- contingency_data$c
d <- contingency_data$d
n <- a + b + c + d
E_a <- (a + b)*(a + c)/n
V_a <- (a + b)*(a + c)*(b + d)*(c + d)/(n^2*(n-1))
Z <- (sum(a) - sum(E_a))/sqrt(sum(V_a))
p <- 1 - pnorm(Z)
odd_ratio <- sum(a*d/n)/sum(b*c/n)
con_op=sum(a)
con_nop=sum(c)
var_op=sum(b)
var_nop=sum(d)
results <- list(Z=Z,
p=p,
odd_ratio=odd_ratio,
conserved_optimal_sites=con_op,
conserved_non_optimal_sites=con_nop,
variable_optimal_sites=var_op,
variable_non_optimal_sites=var_nop,
con_var_ratio=(con_op+con_nop)/(var_op+var_nop)
)
return(results)
}
akashi_test(contingency_file = "~/Data/codon_usage/bee_endosymbionts/akashi_test/test_for_sscu")
correlative_test <- function(genomic_cds_file=NULL, correspondence_file=NULL){
genomic_cds <- read.fasta(file = genomic_cds_file)
#genomic_cds <- read.fasta(file = genomic_cds_file)
num_seq <- length(genomic_cds)
rscu_df <- data.frame()
aa_codons <- list(
c(5:8), #thr
c(21:24), #pro
c(37:40), #ala
c(41:44), #gly
c(45:48), #val
c(10,12,53:56), #ser
c(9,11,25:28), #arg
c(29:32,61,63), #leu
c(13,14,16), #ile
c(1,3), #lys
c(2,4), #asn
c(17,19), #gln
c(18,20), #his
c(33,35), #glu
c(34,36), #asp
c(50,52), #tyr
c(58,60), #cys
c(62,64) #phe
)
logic_vector <- c()
for(i in 1:num_seq){
rscu <- uco(genomic_cds[[i]], index="rscu")
codons <- uco(genomic_cds[[i]], index="eff")
codon_for_aa <- c()
for(h in 1:18){
codon_for_aa <- c(codon_for_aa, sum(codons[aa_codons[[h]]]))
}
codon_family_num <- sum(codon_for_aa > 0)
codon_count <- sum(codons)
logic_vector <- c(logic_vector, codon_count > 50 && codon_family_num > 9)
rscu_df <- rbind(rscu_df,as.vector(rscu))
}
nc_df <- read.table(file = correspondence_file, header=TRUE)
#nc_df <- read.table(file = '/home/yu/Data/codon_usage/bee_endosymbionts/correspondence_analysis/Gvag/Gvag.out', header=TRUE)
correlations_nc <- c()
correlations_nc_p <- c()
for(j in 1:64){
cortest_nc <- cor.test(rscu_df[,j][logic_vector],as.numeric(as.character(nc_df$Nc[logic_vector])),
method="spearman", exact=FALSE)
correlation_nc <- cortest_nc$estimate
correlation_nc_p <- cortest_nc$p.value
correlations_nc <- c(correlations_nc, correlation_nc)
correlations_nc_p <- c(correlations_nc_p, correlation_nc_p)
}
names(correlations_nc) <- c("aaa","aac","aag","aat","aca","acc","acg","act",
"aga","agc","agg","agt","ata","atc","atg","att",
"caa","cac","cag","cat","cca","ccc","ccg","cct",
"cga","cgc","cgg","cgt","cta","ctc","ctg","ctt",
"gaa","gac","gag","gat","gca","gcc","gcg","gct",
"gga","ggc","ggg","ggt","gta","gtc","gtg","gtt",
"taa","tac","tag","tat","tca","tcc","tcg","tct",
"tga","tgc","tgg","tgt","tta","ttc","ttg","ttt")
optimal_codons_nc <- c()
for(i in 1:18){
logic_vec <- correlations_nc_p[aa_codons[[i]]] < 0.05/sum(correlations_nc_p[aa_codons[[i]]])
min_corres <- min(correlations_nc[aa_codons[[i]]][logic_vec])
optimal_codon_nc <- correlations_nc[aa_codons[[i]]][match(min_corres, correlations_nc[aa_codons[[i]]])]
optimal_codons_nc <- c(optimal_codons_nc, optimal_codon_nc)
}
optimal_codon_chr_nc <- attributes(na.omit(optimal_codons_nc))$name
return (optimal_codon_chr_nc);
}
correlative_test_codonw <- function(genomic_cds_file=NULL, correspondence_file=NULL){
genomic_cds <- read.fasta(file = genomic_cds_file)
#genomic_cds <- read.fasta(file = genomic_cds_file)
num_seq <- length(genomic_cds)
rscu_df <- data.frame()
aa_codons <- list(
c(5:8), #thr
c(21:24), #pro
c(37:40), #ala
c(41:44), #gly
c(45:48), #val
c(10,12,53:56), #ser
c(9,11,25:28), #arg
c(29:32,61,63), #leu
c(13,14,16), #ile
c(1,3), #lys
c(2,4), #asn
c(17,19), #gln
c(18,20), #his
c(33,35), #glu
c(34,36), #asp
c(50,52), #tyr
c(58,60), #cys
c(62,64) #phe
)
logic_vector <- c()
for(i in 1:num_seq){
rscu <- uco(genomic_cds[[i]], index="rscu")
codons <- uco(genomic_cds[[i]], index="eff")
codon_for_aa <- c()
for(h in 1:18){
codon_for_aa <- c(codon_for_aa, sum(codons[aa_codons[[h]]]))
}
codon_family_num <- sum(codon_for_aa > 0)
codon_count <- sum(codons)
logic_vector <- c(logic_vector, codon_count > 50 && codon_family_num > 9)
rscu_df <- rbind(rscu_df,as.vector(rscu))
}
nc_df <- read.table(file = correspondence_file, header=TRUE)
#nc_df <- read.table(file = '/home/yu/Data/codon_usage/bee_endosymbionts/correspondence_analysis/Gvag/Gvag.out', header=TRUE)
correlations_nc <- c()
correlations_nc_p <- c()
for(j in 1:64){
cortest_nc <- cor.test(rscu_df[,j][logic_vector],as.numeric(as.character(nc_df$Nc[logic_vector])),
method="spearman", exact=FALSE)
correlation_nc <- cortest_nc$estimate
correlation_nc_p <- cortest_nc$p.value
correlations_nc <- c(correlations_nc, correlation_nc)
correlations_nc_p <- c(correlations_nc_p, correlation_nc_p)
}
names(correlations_nc) <- c("aaa","aac","aag","aat","aca","acc","acg","act",
"aga","agc","agg","agt","ata","atc","atg","att",
"caa","cac","cag","cat","cca","ccc","ccg","cct",
"cga","cgc","cgg","cgt","cta","ctc","ctg","ctt",
"gaa","gac","gag","gat","gca","gcc","gcg","gct",
"gga","ggc","ggg","ggt","gta","gtc","gtg","gtt",
"taa","tac","tag","tat","tca","tcc","tcg","tct",
"tga","tgc","tgg","tgt","tta","ttc","ttg","ttt")
optimal_codons_nc <- c()
for(i in 1:18){
logic_vec <- correlations_nc_p[aa_codons[[i]]] < 0.05/sum(correlations_nc_p[aa_codons[[i]]])
min_corres <- min(correlations_nc[aa_codons[[i]]][logic_vec])
optimal_codon_nc <- correlations_nc[aa_codons[[i]]][match(min_corres, correlations_nc[aa_codons[[i]]])]
optimal_codons_nc <- c(optimal_codons_nc, optimal_codon_nc)
}
optimal_codon_chr_nc <- attributes(na.omit(optimal_codons_nc))$name
return (optimal_codon_chr_nc);
}
correlative_test_codonw(genomic_cds_file = '/home/yu/Data/codon_usage/bee_endosymbionts/cds_filtered/Gvag.ffn', correspondence_file = '/home/yu/Data/codon_usage/bee_endosymbionts/correspondence_analysis/Gvag/Gvag.out')
library("Biostrings", lib.loc="~/R/x86_64-suse-linux-gnu-library/3.3")
library("seqinr", lib.loc="~/R/x86_64-suse-linux-gnu-library/3.3")
correlative_test_codonw(genomic_cds_file = '/home/yu/Data/codon_usage/bee_endosymbionts/cds_filtered/Gvag.ffn', correspondence_file = '/home/yu/Data/codon_usage/bee_endosymbionts/correspondence_analysis/Gvag/Gvag.out')
correlative_test <- function(genomic_cds_file=NULL, nc_file=NULL){
genomic_cds <- read.fasta(file = genomic_cds_file)
num_seq <- length(genomic_cds)
rscu_df <- data.frame()
a_codons <- list(
c(5:8), #thr
c(21:24), #pro
c(37:40), #ala
c(41:44), #gly
c(45:48), #val
c(10,12,53:56), #ser
c(9,11,25:28), #arg
c(29:32,61,63), #leu
c(13,14,16), #ile
c(1,3), #lys
c(2,4), #asn
c(17,19), #gln
c(18,20), #his
c(33,35), #glu
c(34,36), #asp
c(50,52), #tyr
c(58,60), #cys
c(62,64) #phe
)
logic_vector <- c()
for(i in 1:num_seq){
rscu <- uco(genomic_cds[[i]], index="rscu")
codons <- uco(genomic_cds[[i]], index="eff")
codon_for_aa <- c()
for(h in 1:18){
codon_for_aa <- c(codon_for_aa, sum(codons[aa_codons[[h]]]))
}
codon_family_num <- sum(codon_for_aa > 0)
codon_count <- sum(codons)
logic_vector <- c(logic_vector, codon_count > 50 && codon_family_num > 9)
rscu_df <- rbind(rscu_df,as.vector(rscu))
}
nc_df <- read.table(file = nc_file, header=TRUE)
nc_df <- nc_df[-nrow(nc_df),]
correlations_nc <- c()
correlations_nc_p <- c()
correlations_ncp <- c()
correlations_ncp_p <- c()
for(j in 1:64){
cortest_nc <- cor.test(rscu_df[,j][logic_vector],nc_df$Nc[logic_vector],
method="spearman", exact=FALSE)
correlation_nc <- cortest_nc$estimate
correlation_nc_p <- cortest_nc$p.value
correlations_nc <- c(correlations_nc, correlation_nc)
correlations_nc_p <- c(correlations_nc_p, correlation_nc_p)
cortest_ncp <- cor.test(rscu_df[,j][logic_vector],nc_df$Ncp[logic_vector],
method="spearman", exact=FALSE)
correlation_ncp <- cortest_ncp$estimate
correlation_ncp_p <- cortest_ncp$p.value
correlations_ncp <- c(correlations_ncp, correlation_ncp)
correlations_ncp_p <- c(correlations_ncp_p, correlation_ncp_p)
}
# correlations_nc <- c()
# correlations_ncp <- c()
# for(j in 1:64){
#     correlation_nc <- cor(rscu_df[,j][nc_df$n_codons > 50],nc_df$Nc[nc_df$n_codons > 50],
#                        use="complete.obs",method="spearman")
#     correlations_nc <- c(correlations_nc, correlation_nc)
#     correlation_ncp <- cor(rscu_df[,j][nc_df$n_codons > 50],nc_df$Ncp[nc_df$n_codons > 50],
#                           use="complete.obs",method="spearman")
#     correlations_ncp <- c(correlations_ncp, correlation_ncp)
#
# }
names(correlations_nc) <- c("aaa","aac","aag","aat","aca","acc","acg","act",
"aga","agc","agg","agt","ata","atc","atg","att",
"caa","cac","cag","cat","cca","ccc","ccg","cct",
"cga","cgc","cgg","cgt","cta","ctc","ctg","ctt",
"gaa","gac","gag","gat","gca","gcc","gcg","gct",
"gga","ggc","ggg","ggt","gta","gtc","gtg","gtt",
"taa","tac","tag","tat","tca","tcc","tcg","tct",
"tga","tgc","tgg","tgt","tta","ttc","ttg","ttt")
names(correlations_ncp) <- c("aaa","aac","aag","aat","aca","acc","acg","act",
"aga","agc","agg","agt","ata","atc","atg","att",
"caa","cac","cag","cat","cca","ccc","ccg","cct",
"cga","cgc","cgg","cgt","cta","ctc","ctg","ctt",
"gaa","gac","gag","gat","gca","gcc","gcg","gct",
"gga","ggc","ggg","ggt","gta","gtc","gtg","gtt",
"taa","tac","tag","tat","tca","tcc","tcg","tct",
"tga","tgc","tgg","tgt","tta","ttc","ttg","ttt")
optimal_codons_nc <- c()
for(i in 1:18){
logic_vec <- correlations_nc_p[aa_codons[[i]]] < 0.05/sum(correlations_nc_p[aa_codons[[i]]])
min_corres <- min(correlations_nc[aa_codons[[i]]][logic_vec])
optimal_codon_nc <- correlations_nc[aa_codons[[i]]][match(min_corres, correlations_nc[aa_codons[[i]]])]
optimal_codons_nc <- c(optimal_codons_nc, optimal_codon_nc)
}
optimal_codon_chr_nc <- attributes(na.omit(optimal_codons_nc))$name
optimal_codons_ncp <- c()
for(i in 1:18){
logic_vec <- correlations_ncp_p[aa_codons[[i]]] < 0.05/sum(correlations_ncp_p[aa_codons[[i]]])
min_corres <- min(correlations_ncp[aa_codons[[i]]][logic_vec])
optimal_codon_ncp <- correlations_ncp[aa_codons[[i]]][match(min_corres, correlations_ncp[aa_codons[[i]]])]
optimal_codons_ncp <- c(optimal_codons_ncp, optimal_codon_ncp)
}
optimal_codon_chr_nc <- attributes(na.omit(optimal_codons_nc))$name
optimal_codon_chr_ncp <- attributes(na.omit(optimal_codons_ncp))$name
return (optimal_codon_chr_nc);
return (optimal_codon_chr_ncp);
}
correlative_test_codonw(genomic_cds_file = '/home/yu/Data/codon_usage/bee_endosymbionts/cds_filtered/LbDelBA1.ffn', correspondence_file = '/home/yu/Data/codon_usage/bee_endosymbionts/NCprime/del.out')
optimal_codons <- function(high_cds_file=NULL,ref_cds_file=NULL,p_cutoff=0.05){
calc_frequency <- function(cds_file=NULL){
cds <- Biostrings::readDNAStringSet(cds_file)
cat <- unlist(cds)
nuc_freq <- Biostrings::trinucleotideFrequency(cat,step=3)
aa_freq <- Biostrings::alphabetFrequency(
Biostrings::translate(cat,if.fuzzy.codon="X")
)
nuc_exp <- c()
for(i in 1:64){
codon <- attr(nuc_freq[i],"names")
aa <- Biostrings::GENETIC_CODE[attr(GENETIC_CODE,"names")==codon]
aa_num <- aa_freq[attr(aa_freq,"names")==aa]
table_gc <- table(GENETIC_CODE)
codon_expect <- aa_num/table_gc[attr(table_gc,"dimnames")$GENETIC_CODE==aa]
attr(codon_expect,"names") <- codon
nuc_exp <- c(nuc_exp,codon_expect)
}
return(list(nuc_freq=nuc_freq,nuc_exp=nuc_exp))
}
reference_result <- calc_frequency(cds_file=ref_cds_file)
high_result <- calc_frequency(cds_file=high_cds_file)
p <- numeric()
symbol <- character()
rscu_high <- numeric()
rscu_ref <- numeric()
codon <- character()
aa_for_codon <- character()
for(i in 1:64){
codon[i] <- attr(high_result[[1]][i],"names")
aa_for_codon[i] <- GENETIC_CODE[attr(GENETIC_CODE,"names")==codon[i]]
x <- matrix(c(high_result[[1]][i],high_result[[2]][i],
reference_result[[1]][i],reference_result[[2]][i]),ncol=2)
p[i] <- stats::chisq.test(x,simulate.p.value = TRUE)$p.value
rscu_high[i] <- high_result[[1]][i]/high_result[[2]][i]
rscu_ref[i] <- reference_result[[1]][i]/reference_result[[2]][i]
if(is.na(rscu_high[i])){
symbol[i] <- 'NA'
}
else{
if(rscu_high[i]>rscu_ref[i]){
if(p[i]<p_cutoff){
symbol[i] <- '+'
}
else{
symbol[i] <- 'NA'
}
}
else{
if(p[i]<p_cutoff){
symbol[i] <- '-'
}
else{
symbol[i] <- 'NA'
}
}
}
}
out_list <- data.frame(codon=codon,
aa=aa_for_codon,
rscu_high=round(rscu_high,digits=2),
rscu_ref=round(rscu_ref,digits=2),
high_No_codon=high_result[[1]],
high_expect_No_codon=round(high_result[[2]],digits=0),
ref_No_codon=reference_result[[1]],
ref_expect_No_codon=round(reference_result[[2]],digits=0),
p_value=round(p,digits=3),
symbol=symbol)
codon_level <- c("TTT","TTC","TTA","TTG","TCT","TCC","TCA","TCG",
"TAT","TAC","TAA","TAG","TGT","TGC","TGA","TGG",
"CTT","CTC","CTA","CTG","CCT","CCC","CCA","CCG",
"CAT","CAC","CAA","CAG","CGT","CGC","CGA","CGG",
"ATT","ATC","ATA","ATG","ACT","ACC","ACA","ACG",
"AAT","AAC","AAA","AAG","AGT","AGC","AGA","AGG",
"GTT","GTC","GTA","GTG","GCT","GCC","GCA","GCG",
"GAT","GAC","GAA","GAG","GGT","GGC","GGA","GGG")
out_list$codon  <- factor(out_list$codon, levels = codon_level)
sorted_out <- out_list[order(out_list$codon),]
return(sorted_out)
}
optimal_codons(high_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/sharp_40_highly_dataset/Bin2.ffn",ref_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/cds_filtered/Bin2.ffn",p_cutoff = 0.05)
optimal_codons(high_cds_file=system.file("sequences/L_kunkeei_highly.ffn",package="sscu"),ref_cds_file=system.file("sequences/L_kunkeei_genome_cds.ffn",package="sscu"))
optimal_codons(high_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/sharp_40_highly_dataset/Bin4.ffn",ref_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/cds_filtered/Bin4.ffn",p_cutoff = 0.05)
op_highly <- function(high_cds_file=NULL,ref_cds_file=NULL,p_cutoff=0.05){
calc_frequency <- function(cds_file=NULL){
cds <- Biostrings::readDNAStringSet(cds_file)
cat <- unlist(cds)
nuc_freq <- Biostrings::trinucleotideFrequency(cat,step=3)
aa_freq <- Biostrings::alphabetFrequency(
Biostrings::translate(cat,if.fuzzy.codon="X")
)
nuc_exp <- c()
for(i in 1:64){
codon <- attr(nuc_freq[i],"names")
aa <- Biostrings::GENETIC_CODE[attr(GENETIC_CODE,"names")==codon]
aa_num <- aa_freq[attr(aa_freq,"names")==aa]
table_gc <- table(GENETIC_CODE)
codon_expect <- aa_num/table_gc[attr(table_gc,"dimnames")$GENETIC_CODE==aa]
attr(codon_expect,"names") <- codon
nuc_exp <- c(nuc_exp,codon_expect)
}
return(list(nuc_freq=nuc_freq,nuc_exp=nuc_exp))
}
reference_result <- calc_frequency(cds_file=ref_cds_file)
high_result <- calc_frequency(cds_file=high_cds_file)
p <- numeric()
symbol <- character()
rscu_high <- numeric()
rscu_ref <- numeric()
codon <- character()
aa_for_codon <- character()
optimal_codons <- character()
for(i in 1:64){
codon[i] <- attr(high_result[[1]][i],"names")
aa_for_codon[i] <- GENETIC_CODE[attr(GENETIC_CODE,"names")==codon[i]]
x <- matrix(c(high_result[[1]][i],high_result[[2]][i],
reference_result[[1]][i],reference_result[[2]][i]),ncol=2)
p[i] <- stats::chisq.test(x,simulate.p.value = TRUE)$p.value
rscu_high[i] <- high_result[[1]][i]/high_result[[2]][i]
rscu_ref[i] <- reference_result[[1]][i]/reference_result[[2]][i]
if(is.na(rscu_high[i])){
}
else{
if(rscu_high[i]>rscu_ref[i]){
if(p[i]<p_cutoff){
optimal_codons <- c(optimal_codons, codon[i])
}
}
}
}
return(optimal_codons)
}
op_highly(high_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/sharp_40_highly_dataset/Bin4.ffn",ref_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/cds_filtered/Bin4.ffn",p_cutoff = 0.05)
op_highly(high_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/sharp_40_highly_dataset/Gvag.ffn",ref_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/cds_filtered/Gvag.ffn",p_cutoff = 0.05)
optimal_codons()
View(optimal_codons)
op <- p_highly(high_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/sharp_40_highly_dataset/Gvag.ffn",ref_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/cds_filtered/Gvag.ffn",p_cutoff = 0.05)
op <- op_highly(high_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/sharp_40_highly_dataset/Gvag.ffn",ref_cds_file = "/home/yu/Data/codon_usage/bee_endosymbionts/cds_filtered/Gvag.ffn",p_cutoff = 0.05)
op
str(op)
install.packages("rmarkdown")
install.packages("rmarkdown")
source('~/data/sscu/R/proportion_index.R')
