# Libraries
library(dplyr)
library(tibble)
library(DESeq2)
library(EnsDb.Hsapiens.v86)
library(plyr)
library(airway)
data(airway)

Example auto-enrichr analysis

This gist shows a method for automatically running enrichr on a list of gene vectors.

This is an exceedingly common use case for DGE analysis which yields over-expressed and under-expressed genes. It is also common for integrative analyses which explore multiple combinations of gene lists and can easily require > 16 pathway enrichment analyses.

Enrichr rapidly performs pathway enrichrment using a robust statistical approach. However, enrichr is usually performed manually using their web interface or with long-running analysis tasks via the enrichR R package (link). These approaches are unsuitable for notebooks which can change and require many different enrichment analyses. Instead, it is advantageous to use the web API via the httr package to generate permalinks and then automatically include them in the RMarkdown notebook. Please see the following example for further guidance:

Running DESeq2

To begin, we use the example data from the airway R package here. We perform differential gene expression analysis using DESeq2 and wrangle the results with dplyr, tibble, and plyr to get a named list of over-expressed and under-expressed genes.

geneLst <- DESeqDataSet(airway, design = ~ dex) %>%  # Sets up DESeq dataset
  DESeq() %>%  # Run analysis
  results(contrast = c("dex", "trt", "untrt")) %>%  # Collect results
  as.data.frame() %>%  # Convert to dataframe
  rownames_to_column(var = "GENEID") %>%  # Genes are now a column
  dplyr::filter(padj < .05) %>%  # Filter for significant results
  mutate(result = case_when(  # Labels over-expressed vs under-expressed genes
    log2FoldChange > 0 ~ "Over-expressed", 
    TRUE ~ "Under-expressed"
  )) %>%
  inner_join(  # Add in the gene symbols
    AnnotationDbi::select(EnsDb.Hsapiens.v86,
                          keys=keys(EnsDb.Hsapiens.v86),
                          columns = "SYMBOL"),
    by = "GENEID"
  ) %>%
  group_by(result) %>%  # Group by result column
  {setNames(group_split(.), group_keys(.)[[1]])} %>%  # Split tibble into list by group with names
  llply(pull, var = SYMBOL)  # Final conversion to named list of gene vectors
# Display number of genes in each group
llply(geneLst, length)
## $`Over-expressed`
## [1] 1524
## 
## $`Under-expressed`
## [1] 1133

Pathway enrichment

We then perform pathway enrichrment automatically using the code below:

resRmd <- llply(names(geneLst), function(groupNow) {
  genesNow <- geneLst[[groupNow]]
  response <- httr::POST(  # Post request to enrichr based on https://maayanlab.cloud/Enrichr/help#api&q=1
    url = 'https://maayanlab.cloud/Enrichr/addList', 
    body = list(
      'list' = paste0(genesNow, collapse = "\n"),
      'description' = groupNow
      )
    )
  response <- jsonlite::fromJSON(httr::content(response, as = "text"))  # Collect response
  permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=",  # Create permalink
                      response$shortId[1])
  # See this for more guidance: https://bookdown.org/yihui/rmarkdown-cookbook/child-document.html
  knitr::knit_child(text = c(  # Text vector to be knitted into RMarkdown as a child
    '### `r groupNow`',
    '',
    'Enrichr Link: <a href="`r permalink`" target="_blank">`r groupNow`</a>.',
    ''
  ), 
  envir = environment(),  # Current global environment will be passed into the RMarkdown child
  quiet = TRUE)
})
cat(unlist(resRmd), sep = '\n')

Over-expressed

Enrichr Link: Over-expressed.

Under-expressed

Enrichr Link: Under-expressed.

Conclusions

This gist provides an example of differential gene expression in R with pathway enrichment. I have shown you how dplyr, tibble, and plyr can be leveraged to streamline the process of running DESeq2 and how httr can be used to quickly perform pathway enrichrment via the enrichr web service API.

Ways to improve this approach:

  1. Instead of using text, consider using a child Rmd template. See an example here.
  2. Consider using the enrichR R package in order to make visualizations of the top pathways of interest.
  3. Consider using the DT R package to make user-friendly HTML tables for showing DGE results.
  4. Consider including visualizations, such as PCA, MA plots, heatmaps, and volcano plots as in this tutorial.

Session info

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] airway_1.12.0               plyr_1.8.6                 
##  [3] EnsDb.Hsapiens.v86_2.99.0   ensembldb_2.16.2           
##  [5] AnnotationFilter_1.16.0     GenomicFeatures_1.44.0     
##  [7] AnnotationDbi_1.54.1        DESeq2_1.32.0              
##  [9] SummarizedExperiment_1.22.0 Biobase_2.52.0             
## [11] MatrixGenerics_1.4.0        matrixStats_0.59.0         
## [13] GenomicRanges_1.44.0        GenomeInfoDb_1.28.1        
## [15] IRanges_2.26.0              S4Vectors_0.30.0           
## [17] BiocGenerics_0.38.0         tibble_3.1.2               
## [19] dplyr_1.0.7                
## 
## loaded via a namespace (and not attached):
##  [1] ProtGenerics_1.24.0      bitops_1.0-7             bit64_4.0.5             
##  [4] filelock_1.0.2           RColorBrewer_1.1-2       progress_1.2.2          
##  [7] httr_1.4.2               tools_4.1.0              bslib_0.2.5.1           
## [10] utf8_1.2.1               R6_2.5.0                 lazyeval_0.2.2          
## [13] DBI_1.1.1                colorspace_2.0-2         tidyselect_1.1.1        
## [16] prettyunits_1.1.1        bit_4.0.4                curl_4.3.2              
## [19] compiler_4.1.0           xml2_1.3.2               DelayedArray_0.18.0     
## [22] rtracklayer_1.52.0       sass_0.4.0               scales_1.1.1            
## [25] genefilter_1.74.0        rappdirs_0.3.3           Rsamtools_2.8.0         
## [28] stringr_1.4.0            digest_0.6.27            rmarkdown_2.9           
## [31] XVector_0.32.0           pkgconfig_2.0.3          htmltools_0.5.1.1       
## [34] dbplyr_2.1.1             fastmap_1.1.0            rlang_0.4.11            
## [37] RSQLite_2.2.7            jquerylib_0.1.4          BiocIO_1.2.0            
## [40] generics_0.1.0           jsonlite_1.7.2           BiocParallel_1.26.1     
## [43] RCurl_1.98-1.3           magrittr_2.0.1           GenomeInfoDbData_1.2.6  
## [46] Matrix_1.3-4             Rcpp_1.0.7               munsell_0.5.0           
## [49] fansi_0.5.0              lifecycle_1.0.0          stringi_1.7.3           
## [52] yaml_2.2.1               zlibbioc_1.38.0          BiocFileCache_2.0.0     
## [55] grid_4.1.0               blob_1.2.1               crayon_1.4.1            
## [58] lattice_0.20-44          Biostrings_2.60.1        splines_4.1.0           
## [61] annotate_1.70.0          hms_1.1.0                KEGGREST_1.32.0         
## [64] locfit_1.5-9.4           knitr_1.33               pillar_1.6.1            
## [67] rjson_0.2.20             geneplotter_1.70.0       biomaRt_2.48.2          
## [70] XML_3.99-0.6             glue_1.4.2               evaluate_0.14           
## [73] png_0.1-7                vctrs_0.3.8              gtable_0.3.0            
## [76] purrr_0.3.4              assertthat_0.2.1         cachem_1.0.5            
## [79] ggplot2_3.3.5            xfun_0.24                xtable_1.8-4            
## [82] restfulr_0.0.13          survival_3.2-11          GenomicAlignments_1.28.0
## [85] memoise_2.0.0            ellipsis_0.3.2