This is a routine MEGENA pipeline description encompassing from data correlation analysis to module plotting, and is based on version 1.3.6 https://CRAN.R-project.org/package=MEGENA.Please cite the paper below when MEGENA is applied as part of your analysis:
Song W.-M., Zhang B. (2015) Multiscale Embedded Gene Co-expression Network Analysis. PLoS Comput Biol 11(11): e1004574. doi: 10.1371/journal.pcbi.1004574.
For statistical mechanics aspects involved in MEGENA, please check:
Song W.-M., Di Matteo T.and Aste T., Building Complex Networks with Platonic Solids, Physical Reivew E, 2012 Apr;85(4 Pt 2):046115.
rm(list = ls()) # rm R working space
library(MEGENA)
# input parameters
n.cores <- 2; # number of cores/threads to call for PCP
doPar <-TRUE; # do we want to parallelize?
method = "pearson" # method for correlation. either pearson or spearman.
FDR.cutoff = 0.05 # FDR threshold to define significant correlations upon shuffling samples.
module.pval = 0.05 # module significance p-value. Recommended is 0.05.
hub.pval = 0.05 # connectivity significance p-value based random tetrahedral networks
cor.perm = 10; # number of permutations for calculating FDRs for all correlation pairs.
hub.perm = 100; # number of permutations for calculating connectivity significance p-value.
# annotation to be done on the downstream
annot.table=NULL
id.col = 1
symbol.col= 2
###########
data(Sample_Expression) # load toy example data
ijw <- calculate.correlation(datExpr,doPerm = cor.perm,output.corTable = FALSE,output.permFDR = FALSE)
## i = 1
## i = 2
## i = 3
## i = 4
## i = 5
## i = 6
## i = 7
## i = 8
## i = 9
## i = 10
In this step, Planar Filtered Network (PFN) is calculated by taking significant correlation pairs, ijw. In the case of utilizing a different similarity measure, one can independently format the results into 3-column data frame with column names c(“row”,“col”,“weight”), and make sure the weight column ranges within 0 to 1. Using this as an input to calculate.PFN() will work just as fine.
#### register multiple cores if needed: note that set.parallel.backend() is deprecated.
run.par = doPar & (getDoParWorkers() == 1)
if (run.par)
{
cl <- parallel::makeCluster(n.cores)
registerDoParallel(cl)
# check how many workers are there
cat(paste("number of cores to use:",getDoParWorkers(),"\n",sep = ""))
}
## number of cores to use:2
##### calculate PFN
el <- calculate.PFN(ijw[,1:3],doPar = doPar,num.cores = n.cores,keep.track = FALSE)
## ####### PFN Calculation commences ########
## [1] "343 out of 894 has been included."
## [1] "471 out of 894 has been included."
## [1] "556 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 249"
## [1] "663 out of 894 has been included."
## [1] "710 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 78"
## [1] "759 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 60"
## [1] "802 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 24"
## [1] "820 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 21"
## [1] "839 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 14"
## [1] "850 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 5"
## [1] "853 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 4"
## [1] "857 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 3"
## [1] "860 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 3"
## [1] "863 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 5"
## [1] "867 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 1"
## [1] "868 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 4"
## [1] "872 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 10"
## [1] "875 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 1"
## [1] "876 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 2"
## [1] "878 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 5"
## [1] "881 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 5"
## [1] "884 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 19"
## [1] "888 out of 894 has been included."
## [1] "Performing parallel quality checks on 2000"
## [1] "Qualified edges: 217"
## [1] "894 out of 894 has been included."
## [1] "PFG is complete."
g <- graph.data.frame(el,directed = FALSE)
MCA clustering is performed to identify multiscale clustering analysis. “MEGENA.output”" is the core output to be used in the down-stream analyses for summarization and plotting.
##### perform MCA clustering.
MEGENA.output <- do.MEGENA(g,
mod.pval = module.pval,hub.pval = hub.pval,remove.unsig = TRUE,
min.size = 10,max.size = vcount(g)/2,
doPar = doPar,num.cores = n.cores,n.perm = hub.perm,
save.output = FALSE)
###### unregister cores as these are not needed anymore.
if (getDoParWorkers() > 1)
{
env <- foreach:::.foreachGlobals
rm(list=ls(name=env), pos=env)
}
summary.output <- MEGENA.ModuleSummary(MEGENA.output,
mod.pvalue = module.pval,hub.pvalue = hub.pval,
min.size = 10,max.size = vcount(g)/2,
annot.table = annot.table,id.col = id.col,symbol.col = symbol.col,
output.sig = TRUE)
if (!is.null(annot.table))
{
# update annotation to map to gene symbols
V(g)$name <- paste(annot.table[[symbol.col]][match(V(g)$name,annot.table[[id.col]])],V(g)$name,sep = "|")
summary.output <- output[c("mapped.modules","module.table")]
names(summary.output)[1] <- "modules"
}
print(head(summary.output$modules,2))
## $c1_2
## [1] "C1QB" "C1QA" "SASH3" "HLA-DPA1"
## [5] "PLEK" "NCKAP1L" "EVI2B" "HLA-DRA"
## [9] "FCGR1A" "CD53" "PTPRC" "DOCK2"
## [13] "ITGB2" "FCGR1C" "ARHGAP9" "CD37"
## [17] "CD4" "PTPN7" "HLA-DMB" "CD48"
## [21] "FYB" "CYBB" "IKZF1" "LILRB2"
## [25] "HLA-DQA1" "LILRB4" "IL10RA" "CCR5"
## [29] "HLA-DPB1" "LILRB1" "CORO1A" "SPN"
## [33] "PSTPIP1" "SNX20" "MNDA" "TFEC"
## [37] "FGL2" "CD74" "C1QC" "ITGAX"
## [41] "FERMT3" "CCR1" "PRKCB" "CD1E"
## [45] "HLA-DQB1" "IL12RB1" "NCF1C" "CLEC10A"
## [49] "FPR3" "WDFY4" "SP140" "FCGR3A"
## [53] "CTSS" "BIN2" "SIGLEC7" "TLR8"
## [57] "HLA-DRB1" "IFI30" "GPR183" "NCF2"
## [61] "TMC8" "MRC1" "GIMAP5" "CLEC17A"
## [65] "CD86" "PIK3AP1" "CSF1R" "CD163"
## [69] "P2RY13" "DOK2" "CARD11" "LILRB3"
## [73] "APOC1" "IRF8" "CRTAM" "PARP15"
## [77] "AMICA1" "SRGN" "TNFRSF8" "PLD4"
## [81] "MPEG1" "CD1A" "TMEM176A" "EMB"
## [85] "RCSD1" "FCGR1B" "HCLS1" "HLA-DOA"
## [89] "CIITA" "AOAH" "SLA" "KLHL6"
## [93] "PIK3CG" "CD84" "SIGLEC10" "CD1C"
## [97] "NCF1" "CD226" "PTPN22" "IGSF6"
## [101] "ZBP1" "IL16" "LYZ" "HLA-DRB5"
## [105] "CCR4" "CD209" "CD200R1" "TLR10"
## [109] "SIGLEC1" "LOC100233209" "LILRA5" "MYO1G"
## [113] "CD28" "CYTIP" "PTAFR" "HCK"
## [117] "CSF1" "TLR7" "RAC2" "ITGAL"
## [121] "STAP1" "RASGRP2" "PTPRO" "CD180"
## [125] "P2RY12" "EBI3" "RASSF4" "SIGLEC5"
## [129] "FCN1" "OSCAR" "OLR1" "FCGR2B"
## [133] "NLRP2"
##
## $c1_3
## [1] "CD3E" "CD3D" "CD2" "CXCL11" "ITK"
## [6] "ACAP1" "IL2RG" "PTPRCAP" "SLAMF6" "SH2D1A"
## [11] "SIRPG" "CD5" "NKG7" "TIGIT" "CD96"
## [16] "CD247" "CXCR3" "ICOS" "LY9" "TBC1D10C"
## [21] "UBASH3A" "TAP1" "LCK" "GZMA" "TBX21"
## [26] "GPR174" "CCL5" "CD38" "CXCL10" "C5orf20"
## [31] "IRF4" "ZNF831" "GBP5" "LAX1" "CD27"
## [36] "SLA2" "KLRK1" "GZMB" "BTLA" "HLA-B"
## [41] "SCML4" "HLA-F" "GZMK" "HLA-H" "SLAMF1"
## [46] "LOC400759" "SLAMF7" "ZBED2" "PRF1" "IDO1"
## [51] "CD8B" "IL4I1" "ZAP70" "GVIN1" "FCRL3"
## [56] "FASLG" "APOL1" "PSMB9" "AQP9" "TNFRSF9"
## [61] "CXCL9" "C8orf80" "GBP1" "HK3" "ADAMDEC1"
## [66] "CD80" "CTLA4" "SLAMF8" "TIFAB" "CLEC4D"
## [71] "GPR18" "SELL" "APOL3" "CLEC4E" "PRKCQ"
## [76] "CARD16" "CCL4" "KLRC3" "IL7R" "SIT1"
## [81] "TRAT1" "CD3G" "GPR171" "CD6" "C16orf54"
## [86] "CD8A" "CD40LG" "EOMES" "GZMM" "LTA"
## [91] "MAP4K1" "KIAA0748" "SEPT1" "CD52" "PLA2G2D"
## [96] "HCP5" "MEI1" "CST7" "LTB" "XCL2"
## [101] "AIM2" "NLRC5" "GNLY" "IFNG" "ETV7"
## [106] "CTSW" "ZNF683" "C11orf21" "CD69" "PNOC"
## [111] "IL2RB" "TMEM150B" "CCR8" "LGALS2" "LRMP"
## [116] "BCL11B" "IKZF3" "UBD" "MCART6" "CCL3L1"
## [121] "C15orf56"
print(summary.output$module.table)
## module.id module.size module.parent
## c1_2 c1_2 133 c1_1
## c1_3 c1_3 121 c1_1
## c1_4 c1_4 22 c1_1
## c1_5 c1_5 24 c1_1
## c1_6 c1_6 26 c1_2
## c1_7 c1_7 68 c1_2
## c1_10 c1_10 65 c1_3
## c1_12 c1_12 25 c1_3
## c1_13 c1_13 11 c1_3
## c1_19 c1_19 22 c1_7
## c1_21 c1_21 56 c1_10
## c1_23 c1_23 11 c1_12
## module.hub
## c1_2 SASH3(34),CD53(33),PTPRC(26),CD86(20),NCKAP1L(19),FERMT3(19),PLEK(18)
## c1_3 CD2(33),CD3E(32),GBP5(20),ITK(19),SLAMF6(18)
## c1_4 MS4A1(12)
## c1_5 IFIT3(19)
## c1_6 CD86(14)
## c1_7 CD53(32),SASH3(27),PLEK(17),NCKAP1L(17),CD4(16)
## c1_10 CD2(32),CD3E(29)
## c1_12 SLAMF6(14)
## c1_13 PSMB9(9)
## c1_19 SASH3(19),PTPN7(12)
## c1_21 CD2(32),CD3E(29)
## c1_23 SLAMF6(10)
## module.scale module.pvalue
## c1_2 S4 0.00
## c1_3 <NA> 0.00
## c1_4 S4 0.00
## c1_5 S4 0.00
## c1_6 S3 0.00
## c1_7 <NA> 0.00
## c1_10 <NA> 0.00
## c1_12 S4 0.00
## c1_13 S4 0.00
## c1_19 S3 0.01
## c1_21 S4 0.00
## c1_23 S3 0.00
You can generate refined module network plots:
pnet.obj <- plot_module(output.summary = summary.output,PFN = g,subset.module = "c1_3",
layout = "kamada.kawai",label.hubs.only = TRUE,
gene.set = NULL,color.code = "grey",
output.plot = FALSE,out.dir = "modulePlot",col.names = c("magenta","green","cyan"),label.scaleFactor = 20,
hubLabel.col = "black",hubLabel.sizeProp = 1,show.topn.hubs = Inf,show.legend = TRUE)
## Processing module:c1_3
## - # of genes: 121
## - # of hubs: 5
## - generating module subnetwork figure...
#X11();
print(pnet.obj[[1]])
module.table <- summary.output$module.table
colnames(module.table)[1] <- "id" # first column of module table must be labelled as "id".
hierarchy.obj <- plot_module_hierarchy(module.table = module.table,label.scaleFactor = 0.15,
arrow.size = 0.03,node.label.color = "blue")
#X11();
print(hierarchy.obj[[1]])