Results from the univariate regressions performed using can be combined in a post-processing step to perform multivariate hypothesis testing. In this example, we fit on transcript-level counts and then perform multivariate hypothesis testing by combining transcripts at the gene-level. This is done with the function.
Read in transcript counts from the package.
library(readr)
library(tximport)
library(tximportData)
# specify directory
path = system.file("extdata", package="tximportData")
# read sample meta-data
samples = read.table(file.path(path,"samples.txt"), header=TRUE)
samples.ext = read.table(file.path(path,"samples_extended.txt"), header=TRUE, sep="\t")
# read assignment of transcripts to genes
# remove genes on the PAR, since these are present twice
tx2gene = read_csv(file.path(path, "tx2gene.gencode.v27.csv"))
tx2gene = tx2gene[grep("PAR_Y", tx2gene$GENEID, invert=TRUE),]
# read transcript-level quatifictions
files = file.path(path, "salmon", samples$run, "quant.sf.gz")
txi = tximport(files, type = "salmon", txOut=TRUE)
# Create metadata simulating two conditions
sampleTable = data.frame(condition = factor(rep(c("A", "B"), each = 3)))
rownames(sampleTable) = paste0("Sample", 1:6)
Perform standard analysis at the transcript-level
library(variancePartition)
library(edgeR)
# Prepare transcript-level reads
dge = DGEList(txi$counts)
design <- model.matrix(~condition, data = sampleTable)
isexpr = filterByExpr(dge, design)
dge = dge[isexpr,]
dge = calcNormFactors(dge)
# Estimate precision weights
vobj = voomWithDreamWeights(dge, ~ condition, sampleTable)
# Fit regression model one transcript at a time
fit = dream(vobj, ~ condition, sampleTable)
fit = eBayes(fit)
Combine the transcript-level results at the gene-level. The mapping between transcript and gene is stored in as a list.
# Prepare transcript to gene mapping
# keep only transcripts present in vobj
# then convert to list with key GENEID and values TXNAMEs
keep = tx2gene$TXNAME %in% rownames(vobj)
tx2gene.lst = unstack(tx2gene[keep,])
# Run multivariate test on entries in each feature set
res = mvTest(fit, vobj, tx2gene.lst, coef="conditionB")
# truncate gene names since they have version numbers
# ENST00000498289.5 -> ENST00000498289
res$ID.short = gsub("\\..+", "", res$ID)
Perform gene set analysis using on the gene-level test statistics.
# must have zenith > v1.0.2
library(zenith)
library(GSEABase)
gs = get_MSigDB("C1", to="ENSEMBL")
df_gsa = zenithPR_gsa( res$stat, res$ID.short, gs, inter.gene.cor=.05)
head(df_gsa)
## NGenes Correlation delta se p.less p.greater PValue
## M652_chr5q35 81 0.05 -9558.4102 1689.015 7.737973e-09 1.0000000 1.547595e-08
## M12945_chr12q21 35 0.05 -1506.2074 1897.386 2.136523e-01 0.7863477 4.273046e-01
## M5824_chr11p13 30 0.05 -988.0642 1952.248 3.063911e-01 0.6936089 6.127822e-01
## M1556_chr20q11 92 0.05 775.2733 1678.101 6.779541e-01 0.3220459 6.440917e-01
## M13160_chr16p11 105 0.05 -519.6880 1660.274 3.771373e-01 0.6228627 7.542747e-01
## M6742_chr2q36 20 0.05 -575.4590 2133.016 3.936640e-01 0.6063360 7.873280e-01
## Direction FDR
## M652_chr5q35 Down 3.838035e-06
## M12945_chr12q21 Down 9.970286e-01
## M5824_chr11p13 Down 9.970286e-01
## M1556_chr20q11 Up 9.970286e-01
## M13160_chr16p11 Down 9.970286e-01
## M6742_chr2q36 Down 9.970286e-01
## R version 4.3.0 RC (2023-04-13 r84269)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
##
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.17-bioc/R/lib/libRblas.so
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_GB
## [4] LC_COLLATE=C LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/New_York
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] org.Hs.eg.db_3.17.0 msigdbr_7.5.1 GSEABase_1.62.0
## [4] graph_1.78.0 annotate_1.78.0 XML_3.99-0.14
## [7] AnnotationDbi_1.62.1 IRanges_2.34.0 S4Vectors_0.38.1
## [10] Biobase_2.60.0 BiocGenerics_0.46.0 zenith_1.2.0
## [13] tximportData_1.28.0 tximport_1.28.0 readr_2.1.4
## [16] edgeR_3.42.4 pander_0.6.5 variancePartition_1.30.2
## [19] BiocParallel_1.34.2 limma_3.56.2 ggplot2_3.4.2
## [22] knitr_1.43
##
## loaded via a namespace (and not attached):
## [1] jsonlite_1.8.5 magrittr_2.0.3 farver_2.1.1
## [4] nloptr_2.0.3 rmarkdown_2.22 zlibbioc_1.46.0
## [7] vctrs_0.6.2 memoise_2.0.1 minqa_1.2.5
## [10] RCurl_1.98-1.12 S4Arrays_1.0.4 htmltools_0.5.5
## [13] progress_1.2.2 curl_5.0.0 broom_1.0.4
## [16] sass_0.4.6 KernSmooth_2.23-21 bslib_0.4.2
## [19] pbkrtest_0.5.2 plyr_1.8.8 cachem_1.0.8
## [22] lifecycle_1.0.3 iterators_1.0.14 pkgconfig_2.0.3
## [25] Matrix_1.5-4.1 R6_2.5.1 fastmap_1.1.1
## [28] MatrixGenerics_1.12.0 GenomeInfoDbData_1.2.10 rbibutils_2.2.13
## [31] digest_0.6.31 numDeriv_2016.8-1.1 colorspace_2.1-0
## [34] GenomicRanges_1.52.0 RSQLite_2.3.1 filelock_1.0.2
## [37] labeling_0.4.2 RcppZiggurat_0.1.6 clusterGeneration_1.3.7
## [40] fansi_1.0.4 httr_1.4.6 compiler_4.3.0
## [43] bit64_4.0.5 aod_1.3.2 withr_2.5.0
## [46] doParallel_1.0.17 backports_1.4.1 DBI_1.1.3
## [49] highr_0.10 gplots_3.1.3 MASS_7.3-60
## [52] DelayedArray_0.26.3 gtools_3.9.4 caTools_1.18.2
## [55] tools_4.3.0 remaCor_0.0.11 glue_1.6.2
## [58] nlme_3.1-162 grid_4.3.0 reshape2_1.4.4
## [61] generics_0.1.3 snow_0.4-4 gtable_0.3.3
## [64] tzdb_0.4.0 tidyr_1.3.0 hms_1.1.3
## [67] utf8_1.2.3 XVector_0.40.0 foreach_1.5.2
## [70] pillar_1.9.0 stringr_1.5.0 babelgene_22.9
## [73] vroom_1.6.3 splines_4.3.0 dplyr_1.1.2
## [76] BiocFileCache_2.8.0 lattice_0.21-8 bit_4.0.5
## [79] tidyselect_1.2.0 locfit_1.5-9.7 Biostrings_2.68.1
## [82] SummarizedExperiment_1.30.2 RhpcBLASctl_0.23-42 xfun_0.39
## [85] statmod_1.5.0 matrixStats_1.0.0 KEGGgraph_1.60.0
## [88] stringi_1.7.12 yaml_2.3.7 boot_1.3-28.1
## [91] evaluate_0.21 codetools_0.2-19 archive_1.1.5
## [94] tibble_3.2.1 Rgraphviz_2.44.0 cli_3.6.1
## [97] xtable_1.8-4 Rdpack_2.4 munsell_0.5.0
## [100] jquerylib_0.1.4 Rcpp_1.0.10 GenomeInfoDb_1.36.0
## [103] dbplyr_2.3.2 png_0.1-8 Rfast_2.0.7
## [106] RUnit_0.4.32 parallel_4.3.0 blob_1.2.4
## [109] prettyunits_1.1.1 bitops_1.0-7 lme4_1.1-33
## [112] mvtnorm_1.2-1 lmerTest_3.1-3 scales_1.2.1
## [115] purrr_1.0.1 crayon_1.5.2 rlang_1.1.1
## [118] EnrichmentBrowser_2.30.1 KEGGREST_1.40.0
<>