library(iCARH)
## Loading required package: rstan
## Loading required package: StanHeaders
## Loading required package: ggplot2
## rstan (Version 2.19.3, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## Loading required package: MASS
## Loading required package: glue
library(abind)
Tp=4L # timepoints
N=10L # number of samples
J=14L # number of metabolites
K=2L # number of bacteria species
P=8L # number of pathways
set.seed(12473)
For real data Build pathway matrices using iCARH.getPathwaysMat. Elements in KEGG id list may contain multiple KEGG ids per metabolite. If KEGG id unknown use : “Unk[number]”.
keggid = list("Unk1", "C03299","Unk2","Unk3",
c("C08363", "C00712") # allowing multiple ids per metabolite
)
pathways = iCARH.getPathwaysMat(keggid, "rno")
To simulate data use iCARH.simulate. Use path.names to manually choose pathways or simply specify the expected proportion of metabolites per pathway via path.probs.
# Example of manually picked pathways
# path.names = c("path:map00564","path:map00590","path:map00061","path:map00591",
# "path:map00592","path:map00600","path:map01040","path:map00563")
# Specify expected proportion of metabolites per pathway
path.probs = 0.8
data.sim = iCARH.simulate(Tp, N, J, P, K, path.probs = 0.8, Zgroupeff=c(0,4),
beta.val=c(1,-1,0.5, -0.5))
XX = data.sim$XX
Y = data.sim$Y
Z = data.sim$Z
pathways = data.sim$pathways
XX[2,2,2] = NA #missing value example
Check inaccuracies between covariance and design matrices
pathways.bin = lapply(pathways, function(x) { y=1/(x+1); diag(y)=0; y})
adjmat = rowSums(abind::abind(pathways.bin, along = 3), dims=2)
cor.thresh = 0.7
# check number of metabolites in same pathway but not correlated
for(i in 1:Tp) print(sum(abs(cor(XX[i,,])[which(adjmat>0)])<cor.thresh))
## [1] 50
## [1] NA
## [1] 44
## [1] 66
Run iCARH model.
rstan::rstan_options(auto_write = TRUE)
options(mc.cores = 2)
# For testing, does not converge
fit.sim = iCARH.model(XX, Y, Z, groups=rep(c(0,1), each=5), pathways = pathways, control = list(max_treedepth=8),
iter = 4, chains = 1)
## [1] "Stan warning: simpleWarning in system2(file.path(R.home(component = \"bin\"), \"R\"), args = paste(\"CMD config\", : running command ''/Library/Frameworks/R.framework/Resources/bin/R' CMD config CXX14 2>/dev/null' had status 69\n"
# Not run
# fit.sim = iCARH.model(XX, Y, Z, pathways, control = list(adapt_delta = 0.99, max_treedepth=10),
# iter = 2000, chains = 2, pars=c("YY","Xmis","Ymis"), include=F)
Check convergence
if(!is.null(fit.sim$icarh)){
rhats = iCARH.checkRhats(fit.sim)
}
Processing results. Bacteria effects.
if(!is.null(fit.sim$icarh)){
gplot = iCARH.plotBeta(fit.sim)
gplot
}
Treatments effects
if(!is.null(fit.sim$icarh)){
gplot = iCARH.plotTreatmentEffect(fit.sim)
gplot
}
Pathway analysis
if(!is.null(fit.sim$icarh)){
gplot = iCARH.plotPathwayPerturbation(fit.sim, path.names=names(data.sim$pathways))
gplot
}
Normality assumptions
if(!is.null(fit.sim$icarh)){
par(mfrow=c(1,2))
iCARH.checkNormality(fit.sim)
}
WAIC
if(!is.null(fit.sim$icarh)){
waic = iCARH.waic(fit.sim)
waic
}
Posterior predictive checks MAD : mean absolute deviation between covariance matrices
if(!is.null(fit.sim$icarh)){
mad = iCARH.mad(fit.sim)
quantile(mad)
}
Get missing data
if(!is.null(fit.sim$icarh)){
imp = iCARH.getDataImputation(fit.sim)
}