This vignette shows how to use SignacX with Seurat and MASC. There are three parts: Seurat, SignacX and then MASC. We use an example data set of kidney cells from AMP from this publication.
Read the CEL-seq2 data.
<- function(counts.file, meta.file) {
ReadCelseq = suppressWarnings(readr::read_tsv(counts.file))
E <- E$gene
gns = E[, -1]
E = suppressWarnings(readr::read_tsv(meta.file))
M = lapply(unique(M$sample), function(x) {
S = colnames(E) %in% M$cell_name[M$sample == x]
logik ::Matrix(as.matrix(E[, logik]), sparse = TRUE)
Matrix
})names(S) <- unique(M$sample)
= lapply(S, function(x) {
S rownames(x) <- gns
x
})
S
}
= "./SDY997_EXP15176_celseq_matrix_ru10_molecules.tsv.gz"
counts.file = "./SDY997_EXP15176_celseq_meta.tsv"
meta.file
= ReadCelseq(counts.file = counts.file, meta.file = meta.file)
E = suppressWarnings(readr::read_tsv(meta.file)) M
Merge into a single matrix
# keep only kidney cells
= lapply(E, function(x) {
E grepl("K", colnames(x))]
x[,
})
# remove any sample with no cells
= E[sapply(E, ncol) > 0]
E
# merge into a single matrix
= do.call(cbind, E) E
Start with the standard pre-processing steps for a Seurat object.
library(Seurat)
Create a Seurat object, and then perform SCTransform normalization. Note:
# load data
<- CreateSeuratObject(counts = E, project = "celseq")
kidney
# run sctransform
<- SCTransform(kidney) kidney
Perform dimensionality reduction by PCA and UMAP embedding. Note:
# These are now standard steps in the Seurat workflow for visualization and clustering
<- RunPCA(kidney, verbose = FALSE)
kidney <- RunUMAP(kidney, dims = 1:30, verbose = FALSE)
kidney <- FindNeighbors(kidney, dims = 1:30, verbose = FALSE) kidney
First, make sure that you have the SignacX package installed.
install.packages("SignacX")
Generate cellular phenotype labels for the Seurat object. Note:
# Run Signac
library(SignacX)
<- Signac(kidney, num.cores = 4)
labels = GenerateLabels(labels, E = kidney) celltypes
Now we can visualize the cell type classifications at many different levels:
<- AddMetaData(kidney, metadata = celltypes$Immune, col.name = "immmune")
kidney <- SetIdent(kidney, value = "immmune")
kidney png(filename = "fls/plot1_amp.png")
DimPlot(kidney)
dev.off()
<- AddMetaData(kidney, metadata = celltypes$CellTypes, col.name = "celltypes")
kidney <- SetIdent(kidney, value = "celltypes")
kidney png(filename = "fls/plot2_amp.png")
DimPlot(kidney)
dev.off()
<- AddMetaData(kidney, metadata = celltypes$CellStates, col.name = "cellstates")
kidney <- SetIdent(kidney, value = "cellstates")
kidney png(filename = "fls/plot3_amp.png")
DimPlot(kidney)
dev.off()
Identify immune marker genes (IMAGES)
# Downsample just the immune cells
<- kidney[, !celltypes$CellStates %in% c("NonImmune", "Fibroblasts", "Unclassified",
kidney.small "Endothelial", "Epithelial")]
# Find protein markers for all clusters, and draw a heatmap
<- FindAllMarkers(kidney.small, only.pos = TRUE, verbose = F, logfc.threshold = 1)
markers require(dplyr)
<- markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
top5 png(filename = "fls/plot4_amp.png", width = 640, height = 640)
DoHeatmap(kidney.small, features = unique(top5$gene), angle = 90)
dev.off()
= M[match(colnames(kidney), M$cell_name), ]
Meta_mapped $CellStates = celltypes$CellStates
Meta_mapped$disease = factor(Meta_mapped$disease)
Meta_mapped= MASC(dataset = Meta_mapped, cluster = Meta_mapped$CellStates, contrast = "disease", random_effects = c("plate",
Q "lane", "sample"))
MASC results reveals that fibroblasts, plasma cells and B memory cells are enriched (p value < 0.05) for disease.
cluster | size | model.pvalue | diseaseSLE.OR | diseaseSLE.OR.95pct.ci.lower | diseaseSLE.OR.95pct.ci.upper |
---|---|---|---|---|---|
B.memory | 391 | 0.0197539 | 3.041576e+00 | 1.1841285 | 7.812654e+00 |
B.naive | 144 | 0.4652596 | 1.548526e+00 | 0.4932181 | 4.861810e+00 |
DC | 160 | 0.0536208 | 2.346694e+00 | 1.0278135 | 5.357951e+00 |
Endothelial | 30 | 0.2290332 | 1.183979e+01 | 0.1021348 | 1.372508e+03 |
Epithelial | 89 | 0.5615659 | 6.424482e-01 | 0.1488724 | 2.772440e+00 |
Fibroblasts | 360 | 0.0174935 | 3.926459e+00 | 2.5047560 | 6.155124e+00 |
Macrophages | 356 | 0.1061625 | 4.700437e-01 | 0.1959415 | 1.127587e+00 |
Mon.Classical | 247 | 0.6346884 | 8.482458e-01 | 0.4416789 | 1.629059e+00 |
Mon.NonClassical | 658 | 0.7360915 | 8.693916e-01 | 0.3903675 | 1.936231e+00 |
Neutrophils | 11 | 0.3806773 | 1.800080e-02 | 0.0000020 | 1.590775e+02 |
NK | 562 | 0.2814722 | 6.046977e-01 | 0.2470403 | 1.480160e+00 |
NonImmune | 112 | 0.1608263 | 7.817140e-02 | 0.0021302 | 2.868614e+00 |
Plasma.cells | 93 | 0.0055213 | 5.387766e+06 | 0.0000000 | 1.579379e+25 |
T.CD4.memory | 283 | 0.5351536 | 6.933100e-01 | 0.2284183 | 2.104379e+00 |
T.CD4.naive | 459 | 0.4627315 | 7.488830e-01 | 0.3546322 | 1.581429e+00 |
T.CD8.cm | 482 | 0.1776758 | 1.917053e+00 | 0.7686826 | 4.781025e+00 |
T.CD8.em | 1196 | 0.7985566 | 1.107881e+00 | 0.5097435 | 2.407877e+00 |
T.CD8.naive | 39 | 0.8117720 | 1.132623e+00 | 0.4022482 | 3.189161e+00 |
T.regs | 84 | 0.6186262 | 1.571148e+00 | 0.2607223 | 9.467957e+00 |
Unclassified | 717 | 0.8900750 | 9.198574e-01 | 0.2840429 | 2.978908e+00 |
Save results
saveRDS(kidney, file = "fls/amp_kidney_signac.rds")
saveRDS(Q, file = "fls/amp_kidney_MASC_result.rds")
saveRDS(celltypes, file = "fls/amp_kidney_celltypes.rds")
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_4.0.3 magrittr_2.0.1 formatR_1.7 htmltools_0.5.1.1
## [5] tools_4.0.3 yaml_2.2.1 stringi_1.5.3 rmarkdown_2.6
## [9] highr_0.8 knitr_1.30 stringr_1.4.0 digest_0.6.27
## [13] xfun_0.20 rlang_0.4.10 evaluate_0.14