Introduction

We want to illustrate the RPointCloud package (Version 0.8.0) with a single-cell mass cytometry data set. Not surprisingly, we start by loading the package.

library(RPointCloud)

We also load a lot of useful packages (some of which will eventually get incorporated into the package requirements).

library(TDA)

## 
## Attaching package: 'TDA'

## The following object is masked from 'package:cluster':
## 
##     silhouette

library(Polychrome)
data(Dark24)
data(Light24)
library(Mercator)
library(igraph)
library(ClassDiscovery)
library(PCDimension)
library("ape")

Now we fetch the sample data set that is included with the package.

data(treg)
ls()

##  [1] "AML10.node287"      "AML10.node287.rips" "Arip"               "Dark24"            
##  [5] "G"                  "G2"                 "H"                  "K"                 
##  [9] "L"                  "Light24"            "Lt"                 "P"                 
## [13] "U"                  "V"                  "W"                  "X"                 
## [17] "Y"                  "amldist"            "angle.df"           "annote"            
## [21] "clinical"           "colorScheme"        "colset"             "cyc"               
## [25] "cyc1"               "cyc2"               "cyc3"               "cycles"            
## [29] "d0"                 "d1"                 "d2"                 "daisydist"         
## [33] "diag"               "e"                  "eb"                 "edges"             
## [37] "ef"                 "featCD99"           "featKi67"           "featMU"            
## [41] "featRai"            "keg"                "mds"                "mercury"           
## [45] "mu"                 "nu"                 "nzero"              "ob"                
## [49] "oopt"               "pal"                "persistence"        "poof"              
## [53] "rate"               "rip"                "ripdiag"            "shape"             
## [57] "sigma"              "support"            "tmat"               "treg"              
## [61] "vd"                 "xx"                 "yy"

TDA Built-in Visualizations of the Rips Diagram

Here are some plots of the TDA results using tools from the original package. (I am not sure what any of these are really good for.)

diag <- rip[["diagram"]]
opar <- par(mfrow = c(1,2))
plot(diag, barcode = TRUE, main = "Barcode")
plot(diag, main = "Rips Diagram")

Figure 1 : The Rips barcode diagram from TDA.

par(opar)
rm(opar)

L <- TDA::landscape(diag, KK = 1)
S <- TDA::silhouette(diag)
crt <- TDA::clusterTree(as.matrix(tmat), k = 5, dist = "arbitrary")

opar <- par(mfrow = c(1, 3))
plot(L, type = "l", main = "Landscape")
plot(S, type = "l", main = "Silhouette")
plot(crt, type = "lambda",main = "Lambda Cluster Tree")

Figure 2 : Landscape, silhouette, and lambda-cluster plots of the Rips diagram, from TDA.

par(opar)
rm(L, S, opar)

Mercator Visualizations of the Underlying Data and Distance Matrix

M <- Mercator(tmat, metric ="pearson", method = "mds", K = 8)
M <- addVisualization(M, "hclust")
M <- addVisualization(M, "tsne")
M <- addVisualization(M, "umap")
M <- addVisualization(M, "som")
M@palette <- Light24
set.seed(72345)
clue <- kmeans(t(treg), centers = 8, iter.max = 100, nstart = 20)
M <- setClusters(M, clue$cluster)

opar <- par(mfrow = c(3,2), cex = 1.1)
plot(M, view = "hclust")
plot(M, view = "mds", main = "Mult-Dimensional Scaling")
plot(M, view = "tsne", main = "t-SNE")
plot(M, view = "umap", main = "UMAP")
barplot(M, main = "Silhouette Width")
plot(M, view = "som", main = "Self-Organizing Maps")

Figure 3 : Mercator Visualizations of the distance matrix.

par(opar)
rm(opar)

Dimension Zero

Here is a picture of the “zero-cycle” data, which can also be used ultimately to cluster the points (where each point is a patient). The connected lines are similar to a single-linkage clustering structure, showing when individual points are merged together as the TDA parameter increases.

nzero <- sum(diag[, "dimension"] == 0)
cycles <- rip[["cycleLocation"]][2:nzero]
W <- M@view[["umap"]]$layout
plot(W, main = "Connected Zero Cycles")
for (cyc in cycles) {
  points(W[cyc[1], , drop = FALSE], pch = 16,col = "red")
  X <- c(W[cyc[1], 1], W[cyc[2],1])
  Y <- c(W[cyc[1], 2], W[cyc[2],2])
  lines(X, Y)
}

Figure 4 : Hierarchical connections between zero cycles.

Using iGraph

We can convert the 0-dimensional cycle structure into a dendrogram, by first passing them through the igraph package. We start by putting all the zero-cycle data together, which can be viewed as an “edge-list” from the igraph perspective.

edges <- t(do.call(cbind, cycles)) # this creates an "edgelist"
G <- graph_from_edgelist(edges)
G <- set_vertex_attr(G, "label", value = attr(tmat, "Labels"))

Note that we attached the sample names to the graph, obtaining them from the daisy distance matrix. Now we use two different algorithms to decide how to layout the graph.

set.seed(2734)
Lt <- layout_as_tree(G)
L <- layout_with_fr(G)

opar <- par(mfrow = c(1,2), mai = c(0.01, 0.01, 1.02, 0.01))
plot(G, layout = Lt, main = "As Tree")
plot(G, layout = L, main = "Fruchterman-Reingold")

Figure 5 : Two igraph depictions of the zero cycle structure.

par(opar)

Note that the Fruchterman-Reingold layout gives the most informative structure.

Community Structure

There are a variety of community-finding algorithms that we can apply. (Communities in graph theory are similar to clusters in other machine learning areas of study.) “Edge-betweenness” seems to work best.

keg <- cluster_edge_betweenness(G) # 19
table(membership(keg))

## 
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 
## 18 13  5 31 27 26  8 16 11  9 14  8  6  8 10  5 16 10 14

pal <- Light24[membership(keg)]

The first line in the next code chunk shows that we did actually produce a tree. We explore three different ways ro visualize it

is.hierarchical(keg)

## [1] TRUE

H <- as.hclust(keg)
H$labels <- vertex_attr(G, "names")
K <-  12
colset <- Light24[cutree(H, k=K)]
G2 <- set_vertex_attr(G, "color", value = colset)
e <- 0.01
opar <- par(mai = c(e, e, e, e))
plot(G2, layout = L)

Figure 6 : Community structure, simplified.

par(opar)

P <- as.phylo(H)
opar <- par(mai = c(0.01, 0.01, 1.0, 0.01))
plot(P, type = "u", tip.color = colset, cex = 0.8, main = "Ape/Cladogram")

Figure 8 : Cladogram realization, from the ape package.

par(opar)
rm(opar)

Visualizing Features

In any of the “scatter plot views” (e.g., MDS, UMAP, t-SNE) from Mercator, we may want to overlay different colors to represent different clinical features.

U <- M@view[["mds"]]
V <- M@view[["tsne"]]$Y
W <- M@view[["umap"]]$layout

FOXP3 <- Feature(treg["FOXP3",], "FOXP3", c("pink", "skyblue"), c("Low", "High"))
CTLA4 <- Feature(treg["CTLA4", ], "CTLA4", c("green", "magenta"), c("Low", "High"))
opar <- par(mfrow = c(1,2))
plot(W, main = "UMAP; FOXP3", xlab = "U1", ylab = "U2")
points(FOXP3, W, pch = 16, cex = 1.4)
plot(W, main = "UMAP; CTLA4", xlab = "U1", ylab = "U2")
points(CTLA4, W, pch = 16, cex = 1.4)

Figure 10 : UMAP visualizations with clinical features.

par(opar)
rm(opar)

options(oopt)
#rm(list = ls())

RPointCloud: Regulatory T Cells