16 de mayo de 2017

Introduction

  • There is no tool implemented in a data analysis-oriented language to make data retrieval from RegulonDB easy and automatic.
  • Here, we present Regutools, an R package that facilitates data extraction from RegulonDB.

Exploring the database

library(regutools)
## Loading required package: RSQLite
## Loading required package: httr
## Loading required package: XML
ListDatasets()
## [1] "GENE"              "NETWORK"           "OPERON"           
## [4] "REGULONDB_OBJECTS" "TF"                "TU"

Columns of a table

head(ListAttributes("GENE"))
##         column_name
## 1          posright
## 2           gene_tf
## 3      product_name
## 4 celullar_location
## 5      dna_sequence
## 6      sigma_factor

Column description

head(ListAttributes("GENE",
               comments = TRUE))
##         column_name
## 1          posright
## 2           gene_tf
## 3      product_name
## 4 celullar_location
## 5      dna_sequence
## 6      sigma_factor
##                                                       comments
## 1                               Absolute Right genome position
## 2          Transcription Factor Name(s) that regulate the gene
## 3                                                 Product Name
## 4 Location in the cell in which a gene product is functional. 
## 5                     Complete nucleotide sequence of the gene
## 6                  Sigma Factor Name(s) that regulate the gene

Retrieving data

head(GetAttr(attributes=c("posright","name"),
        dataset="GENE"))
##   posright name
## 1  4266861  alr
## 2   796551 modB
## 3  2532224 cysZ
## 4  3813951  dfp
## 5  4348744 dcuB
## 6  2425233 hisM

Using filters

GetAttr(attributes=c("posright","name"),
        filters=list("name"=c("araC","crp","lacI")),
        dataset="GENE")
##   posright name
## 1    71265 araC
## 2  3486752  crp
## 3   367510 lacI

Interval filter

GetAttr(attributes=c("posright","name"),
        filters=list("posright"=c(1,5000)),
        interv=c("posright"),
        dataset="GENE")
##   posright name
## 1     2799 thrA
## 2     3733 thrB
## 3      255 thrL

Partial match

head(GetAttr(attributes=c("posright","name"),
        filters=list("name"="ara"),
        partialmatch="name",
        dataset="GENE"))
##   posright name
## 1    68337 araA
## 2    70048 araB
## 3    71265 araC
## 4    66550 araD
## 5  2982182 araE
## 6  1986128 araF

Complex queries

dim(GetAttr(attributes = c("name", "strand", "posright", "product_name"), 
           dataset = "GENE",
           filters = list(name=c("ARA"),
                          product_name=c("Ara"),
                          strand=c("forward"),
                          posright=c("2000","4000000")
           ),
           and=FALSE,
           partialmatch = c("name", "product_name") ,
           interv="posright" ))
## [1] 2285    4

Complex queries

GetAttr(attributes = c("name", "strand", "posright", "product_name"), 
           dataset = "GENE",
           filters = list(name=c("ARA"),
                          product_name=c("Ara"),
                          strand=c("forward"),
                          posright=c("2000","4000000")
           ),
           and=TRUE,
           partialmatch = c("name", "product_name") ,
           interv="posright" )
##   name  strand posright                                    product_name
## 1 araC forward    71265                                            AraC
## 2 barA forward  2917813                   BarA sensory histidine kinase
## 3 marA forward  1619957 MarA DNA-binding transcriptional dual regulator

Gene regulation

GetGeneRegulation(c("araC","fis","crp"))
##   genes regulators effect
## 1   crp        Fis      -
## 2   fis        Fis      -
## 3   fis        IHF      +
## 4  araC        CRP      +
## 5   crp        CRP    +/-
## 6   fis        CRP    +/-
## 7  araC       AraC    +/-
## 8   crp        Cra      +
## 9  araC       XylR      -

Table format

GetGeneRegulation(c("araC","fis","crp"),
                  format="table")
##       Fis  IHF CRP AraC  Cra XylR
## araC <NA> <NA>   +  +/- <NA>    -
## fis     -    + +/- <NA> <NA> <NA>
## crp     - <NA> +/- <NA>    + <NA>

Retrieve whole network

head(GetNetwork())
##   regulator gene effect
## 1       Fis adhE      +
## 2       Fis osmE      -
## 3       Fis ansB      -
## 4       Fis apaG      +
## 5       Fis apaH      +
## 6       Fis bglB      -

TF-TF network

head(GetNetwork(type = "TF-TF"))
##   regulator gene effect
## 1       Fis  Fis      -
## 2       Fis  CRP      -
## 3       Fis NtrC      +
## 4       Fis H-NS      +
## 5       Fis MarA      +
## 6       Fis GlcC      -

Preguntas

¿El gen "celA" existe?

head(GetAttr(attributes=c("name"),
             filters=list("name"="celA"),
             dataset="GENE"))
## Error: Your query produced no results. Try changing values, filters or attributes.
head(GetAttr(attributes=c("name","synonyms"),
             filters=list("synonyms"="celA"),
             partialmatch = "synonyms",
             dataset="GENE"))
##   name   synonyms
## 1 chbB b1738,celA

¿Cuantas TU's hay en un solo operon?

hist(GetAttr(attributes="total_tu",dataset="OPERON")[,1],
     main = NA,
     xlab = "TU's in a single operon")

¿Cuantas TU's hay en un solo operon?

tu<-GetAttr(attributes="total_tu",dataset="OPERON")[,1]
tu<-tu[tu>3]
hist(tu, main = NA,xlab = "TU's in a single operon")

¿Cuantas TU's hay en un solo operon?

GetAttr(attributes=c("name","total_promoter","sigma_name","tf_name"),
        filters = list("total_tu"=12),
        dataset="OPERON")
##                                                          name
## 1 mraZ-rsmH-ftsLI-murEF-mraY-murD-ftsW-murGC-ddlB-ftsQAZ-lpxC
##   total_promoter      sigma_name                  tf_name
## 1             12 Sigma38,Sigma70 LexA,MraZ,PdhR,RcsB,SdiA

¿Cuántos reguladores existen por efecto?

pie(table(GetNetwork()$effect),
    col=c("green","blue","red"),
    main = "Number of regulations by effect")

Validaciones

GetAttr(attributes = c("name", "stran", "posright", "product_name"), 
           dataset = "GENE",
           filters = list(name=c("ARA"),
                          product_name=c("Ara"),
                          strand=c("forward"),
                          posright=c("2000","4000000")
           ),
           and=FALSE,
           partialmatch = c("name", "product_name") ,
           interv="posright" )
## Error: Provided attribute(s) "stran" do not exist. Please check ListAttributes() function.

Perspectivas

  • Bioconductor
  • Artículo
  • Shiny