chipseqSamples<-read.csv("chipseq/SampleSheet.csv")
dbaobj<-dba(sampleSheet=chipseqSamples)
# Count reads 500bp either side of summits, a peak is considered a peak
# if it is found in 3 samples out of loaded samples.
dbaobj<-dba.count(dbaobj,minOverlap=3,summits=500)
# Define contrasts
dbaobj<-dba.contrast(dbaobj, categories=DBA_CONDITION)
# Calculate differential binding using the DESeq2 engine within DiffBind
dbaobj<-dba.analyze(dbaobj,method=DBA_DESEQ2)
# Save the DiffBind object
save(dbaobj,file=fname)
} else {
load(fname)
}
# Load the (provided) MsigDB pathways, generated by the Broad Institute
load("msigdb/MSigDB_v5.0_human.rda")
pathways<-msigDBentrez$c2.all.v5.0.entrez.gmt
length(pathways)
allgenes<-unique(unlist(pathways))
## Load inputs
contrasts<-c("contrast45vs00","contrast90vs00","contrast90vs45")
for (contrast in contrasts) {
c<-switch(contrast,contrast45vs00=24,contrast90vs00=17,contrast90vs45=15)
write.csv(
dba.report(dbaobj,contrast=c,method=DBA_DESEQ2,bNormalized=FALSE, bCounts=TRUE, th=1),
paste0("results/Report_",contrast,"_Unnormalized.csv")
)
}
filelist<-c(
"results/Report_contrast90vs45_Unnormalized.csv",
"results/Report_contrast90vs00_Unnormalized.csv",
"results/Report_contrast45vs00_Unnormalized.csv")
contrast.names<-contrast.names
# Transform into GRanges
raws<-list()
ii<-1
for(i in input){
raws[[ii]]<-read.delim(i,as.is=TRUE,sep=",")
ii<-ii+1
}
# Transform into GRanges
raws<-list()
ii<-1
for(i in filelist){
raws[[ii]]<-read.delim(i,as.is=TRUE,sep=",")
ii<-ii+1
}
unlink(i)
names(raws)<-contrast.names
grs<-list()
for(i in 1:length(raws)){
grs[[i]]<-GRanges(raws[[i]])
}
names(grs)<-names(raws)
# Gene Annotation Object
annotation<-toGRanges(TxDb.Hsapiens.UCSC.hg19.knownGene, feature="gene")
# Annotate
annos<-list()
for(i in 1:length(grs)){
annos[[i]]<-annotatePeakInBatch(grs[[i]],AnnotationData=annotation,output="overlapping",FeatureLocForDistance="TSS",bindingRegion=c(lborder,rborder))
annos[[i]]<-annos[[i]][!duplicated(annos[[i]]$feature)]
names(annos[[i]])<-annos[[i]]$feature
}
names(annos)<-names(grs)
vulcan.import<-function(rawcounts,samples){
vobj<-list(rawcounts=rawcounts,samples=samples)
return(vobj)
}
vulcan.signature<-function(vobj,contrast=1){
anno<-vobj[[contrast]]
## As signature we use -log10(sign(fold)*p)
# Fold change is not recapitulating the replicate agreements
signature<-anno$p.value*sign(anno$Fold)
names(signature)<-anno$feature
# Uniform ultrasmall gaussian noise (if by chance no reflist genes get into the null GSEA function, we get a division by zero)
set.seed(1)
othergenes<-setdiff(allgenes,names(signature))
gaussiannoise<-setNames(rnorm(length(othergenes),mean=0,sd=0.01),othergenes) # very small
signature<-c(signature,gaussiannoise)
}
#### This function normalizes data
vulcan.normalize<-function(vobj){
### Extract raw counts from object
allsamples<-unique(unlist(vobj$samples))
allgenes<-rownames(vobj$rawcounts)
# Generate a normalized abundance object
conditions<-c()
for(i in 1:length(samples)){
conditions<-c(conditions,rep(names(samples)[i],length(samples[[i]])))
}
conditions<-factor(conditions)
cds<-newCountDataSet(rawcounts,conditions)
cds<-estimateSizeFactors(cds)
cds<-estimateDispersions(cds)
vsd<-varianceStabilizingTransformation(cds)
normalized<-exprs(vsd)
vobj$normalized<-normalized
class(vobj)<-"vulcan"
return(vobj)
}
vulcan<-function(vobj,regulon){
tfs<-names(regulon)
### Prepare output objects
msvipers<-matrix(NA,ncol=3,nrow=length(tfs))
rownames(msvipers)<-tfs
mrss<-list()
### Combinations of contrasts
samples<-vobj$samples
combinations<-combn(names(samples),2)
combonames<-apply(combinations,2,function(x){
paste(rev(x),collapse="_vs_")
})
colnames(msvipers)<-combonames
### Loop over combinations
combn(names(samples),2)
normalized<-vobj$normalized
for(im in 1:length(combonames)){
a<-samples[[combinations[2,im]]]
b<-samples[[combinations[1,im]]]
name<-combonames[im]
# Vulcan msviper implementation
set.seed(1)
signature<-rowTtest(normalized[,a],normalized[,b])$statistic
dnull<-ttestNull(normalized[,a],normalized[,b],per=1000)
mrs<-msviper(signature,regulon,dnull,minsize=10)
msvipers[names(mrs$es$nes),im]<-mrs$es$nes
colnames(msvipers)[im]<-name
# Annotate
mrs<-msviperAnnot(mrs,list_eg2symbol)
mrss[[name]]<-mrs
}
vobj$mrss<-mrss
vobj$msvipers<-msvipers
return(vobj)
}
library(org.Hs.eg.db)
library(ChIPQC)
library(vulcan)
library(aracne.networks)
library(DESeq)
library(GenomicRanges)
library(DiffBind)
library(zoo)
library(gplots)
# remove.packages(c("DiffBind","ChIPQC"))
# packageurl<-"http://bioconductor.org/packages/3.1/bioc/src/contrib/DiffBind_1.14.6.tar.gz"
# install.packages(packageurl, repos=NULL, type="source")
# packageurl<-"https://bioconductor.org/packages/3.2/bioc/src/contrib/ChIPQC_1.6.1.tar.gz"
# install.packages(packageurl, repos=NULL, type="source")
# options(MulticoreParam=quote(MulticoreParam(8)))
list_eg2symbol<-as.list(org.Hs.egSYMBOL[mappedkeys(org.Hs.egSYMBOL)])
e2s<-function(ids){
ids <- as.character(ids)
outlist <- list_eg2symbol[ids]
names(outlist) <- ids
outlist[is.na(outlist)] <- paste("unknown.", ids[is.na(outlist)], sep = "")
outlist <- gsub("unknown.unknown.", "", outlist)
return(outlist)
}
list_symbol2eg <- as.character(org.Hs.egSYMBOL2EG[mappedkeys(org.Hs.egSYMBOL2EG)])
s2e<-function(ids){
ids <- as.character(ids)
outlist <- list_symbol2eg[ids]
names(outlist) <- ids
outlist[is.na(outlist)] <- paste("unknown.", ids[is.na(outlist)], sep = "")
outlist <- gsub("unknown.unknown.", "", outlist)
return(outlist)
}
fname<-"results/diffbind.rda"
if(!file.exists(fname)){
# Load a sample sheet
chipseqSamples<-read.csv("chipseq/SampleSheet.csv")
dbaobj<-dba(sampleSheet=chipseqSamples)
# Count reads 500bp either side of summits, a peak is considered a peak
# if it is found in 3 samples out of loaded samples.
dbaobj<-dba.count(dbaobj,minOverlap=3,summits=500)
# Define contrasts
dbaobj<-dba.contrast(dbaobj, categories=DBA_CONDITION)
# Calculate differential binding using the DESeq2 engine within DiffBind
dbaobj<-dba.analyze(dbaobj,method=DBA_DESEQ2)
# Save the DiffBind object
save(dbaobj,file=fname)
} else {
load(fname)
}
# Load the (provided) MsigDB pathways, generated by the Broad Institute
load("msigdb/MSigDB_v5.0_human.rda")
pathways<-msigDBentrez$c2.all.v5.0.entrez.gmt
length(pathways)
allgenes<-unique(unlist(pathways))
## Load inputs
contrasts<-c("contrast45vs00","contrast90vs00","contrast90vs45")
for (contrast in contrasts) {
c<-switch(contrast,contrast45vs00=24,contrast90vs00=17,contrast90vs45=15)
write.csv(
dba.report(dbaobj,contrast=c,method=DBA_DESEQ2,bNormalized=FALSE, bCounts=TRUE, th=1),
paste0("results/Report_",contrast,"_Unnormalized.csv")
)
}
filelist<-c(
"results/Report_contrast90vs45_Unnormalized.csv",
"results/Report_contrast90vs00_Unnormalized.csv",
"results/Report_contrast45vs00_Unnormalized.csv")
contrast.names<-contrast.names
# Transform into GRanges
raws<-list()
ii<-1
for(i in filelist){
raws[[ii]]<-read.delim(i,as.is=TRUE,sep=",")
ii<-ii+1
}
unlink(i)
names(raws)<-contrast.names
grs<-list()
for(i in 1:length(raws)){
grs[[i]]<-GRanges(raws[[i]])
}
names(grs)<-names(raws)
# Gene Annotation Object
annotation<-toGRanges(TxDb.Hsapiens.UCSC.hg19.knownGene, feature="gene")
# Annotate
annos<-list()
for(i in 1:length(grs)){
annos[[i]]<-annotatePeakInBatch(grs[[i]],AnnotationData=annotation,output="overlapping",FeatureLocForDistance="TSS",bindingRegion=c(lborder,rborder))
annos[[i]]<-annos[[i]][!duplicated(annos[[i]]$feature)]
names(annos[[i]])<-annos[[i]]$feature
}
names(annos)<-names(grs)
# Loop over all contrasts in the vobj object
for(contrast in names(vobj)){
signature<-vulcan.signature(vobj,contrast=contrast)
### Running GSEA
# We will calculate the enrichment of every pathway in the comparison with NES
# (Normalized Enrichment Score)
filename<-paste0("results/nes.pathways_",contrast,".rda")
if(!file.exists(filename)){
nes.pathways<-setNames(rep(0,length(pathways)),names(pathways))
pb<-txtProgressBar(0,length(pathways),style=3)
i<-0
for(pname in names(pathways)){
p<-pathways[[pname]]
obj<-gsea(reflist=signature,set=p,method="pareto",np=100)
nes.pathways[pname]<-obj$nes
setTxtProgressBar(pb,i<-i+1)
}
save(nes.pathways,file=filename)
} else {
load(filename)
}
## Recompute the top 500 with more permutations
top<-sort(abs(nes.pathways),decreasing=TRUE)[1:500]
tpathways<-names(top)
## Relationship between pathway size and NES
## Highlight the top selected for further permutations
sizes<-sapply(pathways,length)
par(mfrow=c(1,1))
plot(sizes,nes.pathways,xlab="Pathway size",ylab="Pathway NES",pch=20,main=contrast)
points(sizes[tpathways],nes.pathways[tpathways],pch=20,col="red3")
grid()
legend("topright",legend="Sent to hpGSEA",col="red3",pch=20,bg="white")
filename<-paste0("results/nes.tpathways_",contrast,".rda")
if(!file.exists(filename)){
nes.tpathways<-setNames(rep(0,length(tpathways)),tpathways)
pb<-txtProgressBar(0,length(tpathways),style=3)
i<-0
for(pname in tpathways){
p<-pathways[[pname]]
obj<-gsea(reflist=signature,set=p,method="pareto",np=1000)
nes.tpathways[pname]<-obj$nes
setTxtProgressBar(pb,i<-i+1)
}
save(nes.tpathways,file=filename)
} else {
load(filename)
}
## Output the top results
# Little table
nes.tpathways<-nes.tpathways[names(sort(abs(nes.tpathways),decreasing=TRUE))]
toshow<-nes.tpathways
toshow<-cbind(toshow,z2p(toshow),p.adjust(z2p(toshow)))
colnames(toshow)<-c("NES","p-value","FDR")
write.table(toshow, file=paste0("results/table_",contrast,".txt"),sep="\t",quote=FALSE)
## Output the top pathway as a GSEA plot
top<-names(nes.tpathways)[1]
for (pname in top){
p<-pathways[[pname]]
obj<-gsea(reflist=signature,set=p,method="pareto",np=1000)
plot_gsea(obj,bottomYtitle="",title=paste0(pname," in ",contrast),correctEntrez = TRUE, ext_nes = nes.tpathways[pname])
}
}
contrasts<-c("contrast45vs00","contrast90vs00","contrast90vs45")
for (contrast in contrasts) {
c<-switch(contrast,contrast45vs00=24,contrast90vs00=17,contrast90vs45=15)
write.csv(
dba.report(dbaobj,contrast=c,method=DBA_DESEQ2,bNormalized=FALSE, bCounts=TRUE, th=1),
paste0("results/Report_",contrast,"_Unnormalized.csv")
)
}
filelist<-c(
"results/Report_contrast90vs45_Unnormalized.csv",
"results/Report_contrast90vs00_Unnormalized.csv",
"results/Report_contrast45vs00_Unnormalized.csv")
contrast.names<-contrasts
# Transform into GRanges
raws<-list()
ii<-1
for(i in filelist){
raws[[ii]]<-read.delim(i,as.is=TRUE,sep=",")
ii<-ii+1
}
unlink(i)
names(raws)<-contrast.names
grs<-list()
for(i in 1:length(raws)){
grs[[i]]<-GRanges(raws[[i]])
}
names(grs)<-names(raws)
# Gene Annotation Object
annotation<-toGRanges(TxDb.Hsapiens.UCSC.hg19.knownGene, feature="gene")
# Annotate
annos<-list()
for(i in 1:length(grs)){
annos[[i]]<-annotatePeakInBatch(grs[[i]],AnnotationData=annotation,output="overlapping",FeatureLocForDistance="TSS",bindingRegion=c(lborder,rborder))
annos[[i]]<-annos[[i]][!duplicated(annos[[i]]$feature)]
names(annos[[i]])<-annos[[i]]$feature
}
names(annos)<-names(grs)
# Annotate
annos<-list()
for(i in 1:length(grs)){
annos[[i]]<-annotatePeakInBatch(grs[[i]],AnnotationData=annotation,output="overlapping",FeatureLocForDistance="TSS",bindingRegion=c(lborder,rborder))
annos[[i]]<-annos[[i]][!duplicated(annos[[i]]$feature)]
names(annos[[i]])<-annos[[i]]$feature
}
names(annos)<-names(grs)
# Loop over all contrasts in the annos object
for(contrast in names(annos)){
signature<-vulcan.signature(annos,contrast=contrast)
### Running GSEA
# We will calculate the enrichment of every pathway in the comparison with NES
# (Normalized Enrichment Score)
filename<-paste0("results/nes.pathways_",contrast,".rda")
if(!file.exists(filename)){
nes.pathways<-setNames(rep(0,length(pathways)),names(pathways))
pb<-txtProgressBar(0,length(pathways),style=3)
i<-0
for(pname in names(pathways)){
p<-pathways[[pname]]
obj<-gsea(reflist=signature,set=p,method="pareto",np=100)
nes.pathways[pname]<-obj$nes
setTxtProgressBar(pb,i<-i+1)
}
save(nes.pathways,file=filename)
} else {
load(filename)
}
## Recompute the top 500 with more permutations
top<-sort(abs(nes.pathways),decreasing=TRUE)[1:500]
tpathways<-names(top)
## Relationship between pathway size and NES
## Highlight the top selected for further permutations
sizes<-sapply(pathways,length)
par(mfrow=c(1,1))
plot(sizes,nes.pathways,xlab="Pathway size",ylab="Pathway NES",pch=20,main=contrast)
points(sizes[tpathways],nes.pathways[tpathways],pch=20,col="red3")
grid()
legend("topright",legend="Sent to hpGSEA",col="red3",pch=20,bg="white")
filename<-paste0("results/nes.tpathways_",contrast,".rda")
if(!file.exists(filename)){
nes.tpathways<-setNames(rep(0,length(tpathways)),tpathways)
pb<-txtProgressBar(0,length(tpathways),style=3)
i<-0
for(pname in tpathways){
p<-pathways[[pname]]
obj<-gsea(reflist=signature,set=p,method="pareto",np=1000)
nes.tpathways[pname]<-obj$nes
setTxtProgressBar(pb,i<-i+1)
}
save(nes.tpathways,file=filename)
} else {
load(filename)
}
## Output the top results
# Little table
nes.tpathways<-nes.tpathways[names(sort(abs(nes.tpathways),decreasing=TRUE))]
toshow<-nes.tpathways
toshow<-cbind(toshow,z2p(toshow),p.adjust(z2p(toshow)))
colnames(toshow)<-c("NES","p-value","FDR")
write.table(toshow, file=paste0("results/table_",contrast,".txt"),sep="\t",quote=FALSE)
## Output the top pathway as a GSEA plot
top<-names(nes.tpathways)[1]
for (pname in top){
p<-pathways[[pname]]
obj<-gsea(reflist=signature,set=p,method="pareto",np=1000)
plot_gsea(obj,bottomYtitle="",title=paste0(pname," in ",contrast),correctEntrez = TRUE, ext_nes = nes.tpathways[pname])
}
}
library(vulcan)
### Number of tested pathways
load("msigdb/MSigDB_v5.0_human.rda")
raw<-msigDBentrez$c2.all.v5.0.entrez.gmt
universe<-names(raw)[grep("BIOCARTA_|REACTOME_|KEGG_|PID_|ST_",names(raw))]
### TF Comparison: VULCAN vs. GREAT
load("results/vobj_tcga.rda")
contrasts<-c("45vs00","90vs00")
for(c in contrasts){
# Our GSEA pathways
load(paste0("results/nes.tpathways_",c,".rda")) # nes.tpathways
# GREAT pathways
rawgreat<-read.delim(paste0("methodComparison/great_",c,"_p1_UP.tsv"),as.is=TRUE,skip=3)
table(rawgreat[,1])
rawgreat<-rawgreat[rawgreat[,1]=="MSigDB Pathway",]
great<-setNames(rawgreat$BinomBonfP,rawgreat[,2])
great<-great[great<0.1]
### Comparison GREAT/VULCAN
vsig<-nes.tpathways[z2p(nes.tpathways)<0.1]
venn(list(VULCAN=names(vsig),GREAT=names(great)))
title(paste0("Shared upregulated pathways at ",c))
ctable<-rbind(c(0,0),c(0,0))
ctable[1,1]<-length(intersect(names(vsig),names(great)))
ctable[1,2]<-length(setdiff(names(vsig),names(great)))
ctable[2,1]<-length(setdiff(names(great),names(vsig)))
ctable[2,2]<-length(universe)-ctable[1,1]-ctable[1,2]-ctable[2,1]
fp<-signif(fisher.test(ctable)$p.value,4)
mtext(paste0("FET p-value: ",fp))
text(100,3,ctable[2,2])
common<-intersect(names(nes.tpathways),names(great))
}
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
library(vulcan)
wass<-load("/Users/giorgi01/Dropbox/rstudio/LungCancerCure/shared/networks/aracne-bronchial-tfs.rda")
# Pathways
wass<-load("/Users/giorgi01/Dropbox/rstudio/lists/MSigDB_v5.0_human.rda")
library(phyper)
install.packages("phyper")
phyper
wass<-load("/Users/giorgi01/Dropbox/rstudio/LungCancerCure/shared/networks/aracne-bronchial-tfs.rda")
wass
wass<-load("/Users/giorgi01/Dropbox/rstudio/lists/MSigDB_v5.0_human.rda") #
wass
names(pathways)
wass<-load("/Users/giorgi01/Dropbox/rstudio/lists/MSigDB_v5.0_human.rda") # "msigDBentrez" "msigDBsymbol"
pathways<-msigDBentrez$c2.cp.biocarta.v5.0.entrez.gmt
### Hypergeometric test implementation
names(pathways)
### Testing
library(org.Hs.eg.db)
list_eg2symbol<-as.list(org.Hs.egSYMBOL[mappedkeys(org.Hs.egSYMBOL)])
e2s<-function(ids){
ids <- as.character(ids)
outlist <- list_eg2symbol[ids]
names(outlist) <- ids
outlist[is.na(outlist)] <- paste("unknown.", ids[is.na(outlist)], sep = "")
outlist <- gsub("unknown.unknown.", "", outlist)
return(outlist)
}
list_symbol2eg <- as.character(org.Hs.egSYMBOL2EG[mappedkeys(org.Hs.egSYMBOL2EG)])
s2e<-function(ids){
ids <- as.character(ids)
outlist <- list_symbol2eg[ids]
names(outlist) <- ids
outlist[is.na(outlist)] <- paste("unknown.", ids[is.na(outlist)], sep = "")
outlist <- gsub("unknown.unknown.", "", outlist)
return(outlist)
}
names(pathways)
pathway<-pathways[["BIOCARTA_GATA3_PATHWAY"]]
tf<-"GATA3"
networkUP<-names(regulon[[s2e(tf)]]$tfmode)[regulon[[s2e(tf)]]$tfmode>=0]
networkDN<-names(regulon[[s2e(tf)]]$tfmode)[regulon[[s2e(tf)]]$tfmode<0]
networkUP
networkDN
ngenes<-length(unique(unlist(pathways)))
ngenes
ngenes<-length(unique(unlist(msigDBentrez)))
ngenes
ngenes<-length(unique(unlist(msigDBentrez$c2.all.v5.0.entrez.gmt)))
ngenes
q<-overlap-1
m<-list1
n<-ngenes-list1
k<-list2
phyper(q,m,n,k,lower.tail=FALSE,log.p=FALSE)
list1<-length(networkUP)
list2<-length(pathway)
overlap<-length(intersect(pathway,networkUP))
q<-overlap-1
m<-list1
n<-ngenes-list1
k<-list2
phyper(q,m,n,k,lower.tail=FALSE,log.p=FALSE)
overlap
library(vulcan)
library(vulcan)
library(vulcan)
install.packages("csaw")
setRepositories()
install.packages("csaw")
