Protein complex-based group regression models for risk protein complex identification.
To install the released version from CRAN with:
install.packages("PCLassoReg")
To install the latest development version from GitHub:
::install_github("weiliu123/PCLassoReg") devtools
The package implements protein complex-based group regression models (PCLasso and PCLasso2) for risk protein complex identification. PCLasso is a prognostic model that identifies risk protein complexes associated with survival. PCLasso2 is a classification model that identifies risk protein complexes associated with classes.
library(PCLassoReg)
#################### PCLasso ####################
# load data
data(survivalData)
data(PCGroups)
<- survivalData$Exp
x <- survivalData$survData
y
# get human protein complexes
<- getPCGroups(Groups = PCGroups, Organism = "Human",
PC.Human Type = "EntrezID")
set.seed(20150122)
<- sample(nrow(x), round(nrow(x)*2/3))
idx.train <- x[idx.train,]
x.train <- y[idx.train,]
y.train <- x[-idx.train,]
x.test <- y[-idx.train,]
y.test
# fit cv.PCLasso model
<- cv.PCLasso(x = x.train, y = y.train, group = PC.Human, nfolds = 5)
cv.fit1
# predict risk scores of samples in x.test
<- predict(object = cv.fit1, x = x.test, type="link",
s lambda=cv.fit1$cv.fit$lambda.min)
# Nonzero coefficients/risk protein complexes
<- predict(object = cv.fit1, type="groups",
sel.groups lambda = cv.fit1$cv.fit$lambda.min)
# Nonzero coefficients/risk proteins
<- predict(object = cv.fit1, type="vars.unique",
sel.vars.unique lambda = cv.fit1$cv.fit$lambda.min)
#################### PCLasso2 ####################
# load data
data(classData)
data(PCGroups)
= classData$Exp
x = classData$Label
y
# get human protein complexes
<- getPCGroups(Groups = PCGroups, Organism = "Human",
PC.Human Type = "GeneSymbol")
set.seed(20150122)
<- sample(nrow(x), round(nrow(x)*2/3))
idx.train <- x[idx.train,]
x.train <- y[idx.train]
y.train <- x[-idx.train,]
x.test <- y[-idx.train]
y.test
# fit model
<- cv.PCLasso2(x = x.train, y = y.train, group = PC.Human,
cv.fit1 penalty = "grLasso", family = "binomial", nfolds = 5)
# predict risk scores of samples in x.test
<- predict(object = cv.fit1, x = x.test, type="class",
s lambda=cv.fit1$cv.fit$lambda.min)
# Nonzero coefficients/risk protein complexes
<- predict(object = cv.fit1, type="groups",
sel.groups lambda = cv.fit1$cv.fit$lambda.min)
# Nonzero coefficients/risk proteins
<- predict(object = cv.fit1, type="vars.unique",
sel.vars.unique lambda = cv.fit1$cv.fit$lambda.min)
PCLasso2: a protein complex-based, group Lasso-logistic model for risk protein complex discovery. To be published.
PCLasso: a protein complex-based, group lasso-Cox model for accurate prognosis and risk protein complex discovery. Brief Bioinform, 2021.
Park, H., Niida, A., Miyano, S. and Imoto, S. (2015) Sparse overlapping group lasso for integrative multi-omics analysis. Journal of computational biology: a journal of computational molecular cell biology, 22, 73-84.