# Libraries
library(dplyr)
library(tibble)
library(DESeq2)
library(EnsDb.Hsapiens.v86)
library(plyr)
library(airway)
data(airway)
This gist shows a method for automatically running enrichr on a list of gene vectors.
This is an exceedingly common use case for DGE analysis which yields over-expressed and under-expressed genes. It is also common for integrative analyses which explore multiple combinations of gene lists and can easily require > 16 pathway enrichment analyses.
Enrichr rapidly performs pathway enrichrment using a robust statistical approach. However, enrichr is usually performed manually using their web interface or with long-running analysis tasks via the enrichR
R package (link). These approaches are unsuitable for notebooks which can change and require many different enrichment analyses. Instead, it is advantageous to use the web API via the httr
package to generate permalinks and then automatically include them in the RMarkdown notebook. Please see the following example for further guidance:
To begin, we use the example data from the airway
R package here. We perform differential gene expression analysis using DESeq2
and wrangle the results with dplyr
, tibble
, and plyr
to get a named list of over-expressed and under-expressed genes.
<- DESeqDataSet(airway, design = ~ dex) %>% # Sets up DESeq dataset
geneLst DESeq() %>% # Run analysis
results(contrast = c("dex", "trt", "untrt")) %>% # Collect results
as.data.frame() %>% # Convert to dataframe
rownames_to_column(var = "GENEID") %>% # Genes are now a column
::filter(padj < .05) %>% # Filter for significant results
dplyrmutate(result = case_when( # Labels over-expressed vs under-expressed genes
> 0 ~ "Over-expressed",
log2FoldChange TRUE ~ "Under-expressed"
%>%
)) inner_join( # Add in the gene symbols
::select(EnsDb.Hsapiens.v86,
AnnotationDbikeys=keys(EnsDb.Hsapiens.v86),
columns = "SYMBOL"),
by = "GENEID"
%>%
) group_by(result) %>% # Group by result column
setNames(group_split(.), group_keys(.)[[1]])} %>% # Split tibble into list by group with names
{llply(pull, var = SYMBOL) # Final conversion to named list of gene vectors
# Display number of genes in each group
llply(geneLst, length)
## $`Over-expressed`
## [1] 1524
##
## $`Under-expressed`
## [1] 1133
We then perform pathway enrichrment automatically using the code below:
<- llply(names(geneLst), function(groupNow) {
resRmd <- geneLst[[groupNow]]
genesNow <- httr::POST( # Post request to enrichr based on https://maayanlab.cloud/Enrichr/help#api&q=1
response url = 'https://maayanlab.cloud/Enrichr/addList',
body = list(
'list' = paste0(genesNow, collapse = "\n"),
'description' = groupNow
)
)<- jsonlite::fromJSON(httr::content(response, as = "text")) # Collect response
response <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=", # Create permalink
permalink $shortId[1])
response# See this for more guidance: https://bookdown.org/yihui/rmarkdown-cookbook/child-document.html
::knit_child(text = c( # Text vector to be knitted into RMarkdown as a child
knitr'### `r groupNow`',
'',
'Enrichr Link: <a href="`r permalink`" target="_blank">`r groupNow`</a>.',
''
), envir = environment(), # Current global environment will be passed into the RMarkdown child
quiet = TRUE)
})cat(unlist(resRmd), sep = '\n')
Enrichr Link: Over-expressed.
Enrichr Link: Under-expressed.
This gist provides an example of differential gene expression in R with pathway enrichment. I have shown you how dplyr
, tibble
, and plyr
can be leveraged to streamline the process of running DESeq2 and how httr
can be used to quickly perform pathway enrichrment via the enrichr web service API.
Ways to improve this approach:
enrichR
R package in order to make visualizations of the top pathways of interest.DT
R package to make user-friendly HTML tables for showing DGE results.sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] airway_1.12.0 plyr_1.8.6
## [3] EnsDb.Hsapiens.v86_2.99.0 ensembldb_2.16.2
## [5] AnnotationFilter_1.16.0 GenomicFeatures_1.44.0
## [7] AnnotationDbi_1.54.1 DESeq2_1.32.0
## [9] SummarizedExperiment_1.22.0 Biobase_2.52.0
## [11] MatrixGenerics_1.4.0 matrixStats_0.59.0
## [13] GenomicRanges_1.44.0 GenomeInfoDb_1.28.1
## [15] IRanges_2.26.0 S4Vectors_0.30.0
## [17] BiocGenerics_0.38.0 tibble_3.1.2
## [19] dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] ProtGenerics_1.24.0 bitops_1.0-7 bit64_4.0.5
## [4] filelock_1.0.2 RColorBrewer_1.1-2 progress_1.2.2
## [7] httr_1.4.2 tools_4.1.0 bslib_0.2.5.1
## [10] utf8_1.2.1 R6_2.5.0 lazyeval_0.2.2
## [13] DBI_1.1.1 colorspace_2.0-2 tidyselect_1.1.1
## [16] prettyunits_1.1.1 bit_4.0.4 curl_4.3.2
## [19] compiler_4.1.0 xml2_1.3.2 DelayedArray_0.18.0
## [22] rtracklayer_1.52.0 sass_0.4.0 scales_1.1.1
## [25] genefilter_1.74.0 rappdirs_0.3.3 Rsamtools_2.8.0
## [28] stringr_1.4.0 digest_0.6.27 rmarkdown_2.9
## [31] XVector_0.32.0 pkgconfig_2.0.3 htmltools_0.5.1.1
## [34] dbplyr_2.1.1 fastmap_1.1.0 rlang_0.4.11
## [37] RSQLite_2.2.7 jquerylib_0.1.4 BiocIO_1.2.0
## [40] generics_0.1.0 jsonlite_1.7.2 BiocParallel_1.26.1
## [43] RCurl_1.98-1.3 magrittr_2.0.1 GenomeInfoDbData_1.2.6
## [46] Matrix_1.3-4 Rcpp_1.0.7 munsell_0.5.0
## [49] fansi_0.5.0 lifecycle_1.0.0 stringi_1.7.3
## [52] yaml_2.2.1 zlibbioc_1.38.0 BiocFileCache_2.0.0
## [55] grid_4.1.0 blob_1.2.1 crayon_1.4.1
## [58] lattice_0.20-44 Biostrings_2.60.1 splines_4.1.0
## [61] annotate_1.70.0 hms_1.1.0 KEGGREST_1.32.0
## [64] locfit_1.5-9.4 knitr_1.33 pillar_1.6.1
## [67] rjson_0.2.20 geneplotter_1.70.0 biomaRt_2.48.2
## [70] XML_3.99-0.6 glue_1.4.2 evaluate_0.14
## [73] png_0.1-7 vctrs_0.3.8 gtable_0.3.0
## [76] purrr_0.3.4 assertthat_0.2.1 cachem_1.0.5
## [79] ggplot2_3.3.5 xfun_0.24 xtable_1.8-4
## [82] restfulr_0.0.13 survival_3.2-11 GenomicAlignments_1.28.0
## [85] memoise_2.0.0 ellipsis_0.3.2