This is a MSC workflow vignette using 8k PBMC single-cell RNA-seq data. The raw count data can be downloaded NF from Data Portal and on Synpase with DOI, https://doi.org/10.7303/syn52966803, under ‘8k_PBMC_10x_Counts’ folder. The downloaded data should be placed under folder named “Data”, rooted on the working directory of choice. To save time, the processed data can also be downloaded from the same DOI link in ‘8k_PBMC_Processed’ folder, and should be placed under the working directory. This will bypass from Section 1 to Section 2.
Overall, the vignette is organized as follows:
Section 1. Pre-processing;
Section 2. Cell type annotations using SingleR;
Section 3. MSC workflow: Pearson’s correlation;
Section 4. MSC workflow: Eulidean’s correlation;
Section 5. Hierarchical visualization of
markers;
Section 6. Visualize MSC results using various
tools;
Section 7. Hierarchical cluster respresentation with
piecharts.
rm(list = ls())
library(SingleCellExperiment)
library(ALRA)
library(doParallel)
library(igraph)
library(MEGENA)
library(Matrix)
library(ggraph)
library(scater)
library(scran)
library(MSC)
#### Set up folders
root.dir <- "/home/won-min/Documents/R_packages/Single_Cell_Clustering/pbmc_8k";setwd(root.dir)
out.dir = "Results";dir.create(out.dir) # set up output folders
## Warning in dir.create(out.dir): 'Results' already exists
#### set-up parameters
mt.val <- 20 # mitochondrial proportion %
min.count = 5 # minimum sum count to have a gene as valid across the whole cells
min.gene = 100 # minumum number of genes to be expressed per cell
min.cell = 10 # minimum number of cells showing expression for a gene to be valid
n.cores = 4 # number of cores to parallelize
min.cells = 10; # minimum cell cluster size
bio.cutoff = 0; # threshold to filter significant genes with biological variances as modeled from mean-variance fitting
pval.cutoff = 0.05; # p-value threshold to get significant overall gene variances.
######### Data pre-processing: Create Single-Cell Experiment object
if (TRUE)
{
#### load matrix
sc.data = load_mtx(mtx.folder = NULL,
mat.path = "Data/matrix.mtx",
feat.path = "Data/genes.tsv",
bar.path = "Data/barcodes.tsv",
is.gzip = FALSE)
colnames(sc.data$gene.features)[1:2] = c("gene.name","gene.symbol")
sc.data$gene.matrix = as(sc.data$gene.matrix,"CsparseMatrix")
# make SingleCellExperiment object
sce = SingleCellExperiment(assays = list(counts = sc.data$gene.matrix))
# update rowdata for main data
rowData(sce) = sc.data$gene.features[,1:2]
is.mito = grepl("^MT-|^Mt-|^mt-",rowData(sce)$gene.symbol)
rowData(sce)$is.mito = is.mito
# update rownames wiwth gene symbols
rownames(sce) = paste0(sc.data$gene.features[[2]],"|",sc.data$gene.features[[1]])
sce <- addPerCellQC(sce, subsets=list(Mito=which(rowData(sce)$is.mito)))
# Perform QC on the single-cell experiment object through streamlined "process_data()" function
outfile = paste0(out.dir,"/SCE.standardized_QC.RDS")
qc.out = process_data(sce,do.impute = TRUE)
rm(sce)
gc()
# calculate dimension reduction on ALRA-imputed data: PCA, UMAP and tSNE
reducedDim(qc.out$sce,"PCA.imputed") = calculatePCA(x = assay(qc.out$sce,"ALRA.logcounts"),subset_row = rownames(subset(qc.out$gene.data.alra,p.value < 0.05)))
reducedDim(qc.out$sce,"UMAP.imputed") = calculateUMAP(x = qc.out$sce,dimred = "PCA.imputed")
reducedDim(qc.out$sce,"tSNE.imputed") = calculateTSNE(x = qc.out$sce,dimred = "PCA.imputed")
plotReducedDim(qc.out$sce,dimred = "tSNE.imputed")
# save QC'ed data
saveRDS(qc.out,file = paste0(out.dir,"/SCE.standardized_QC.RDS"))
}
## - number of genes filtered:4950
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 1.8 GiB
## Read matrix with 8364 cells and 29040 genes
## Chose k=32
## Getting nonzeros
## Randomized SVD
## Find the 0.001000 quantile of each gene
## Sweep
## Scaling all except for 3835 columns
## 0.01% of the values became negative in the scaling process and were set to zero
## The matrix went from 4.86% nonzero to 32.48% nonzero
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
We used SingleR, as an independent cell type inference method. SingleR works with another R package, celldex, which holds comprehensive reference data sets for generic PBMC cell types.
library(SingleR)
library(celldex)
hpca.se <- MonacoImmuneData()
m = assay(qc.out$sce,"ALRA.logcounts")
rownames(m) = gsub("\\|(.*)$","",rownames(m))
gene.common = intersect(rownames(hpca.se),rownames(m))
hpca.se = hpca.se[gene.common,]
m = m[gene.common,]
# fine subtype
pred.ct <- SingleR(test = m, ref = hpca.se, assay.type.test=1,
labels = hpca.se$label.fine)
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
colData(qc.out$sce)$inferred.cell.type.fine = pred.ct$pruned.labels[match(colnames(qc.out$sce),rownames(pred.ct))]
# fine subtype
pred.ct <- SingleR(test = m, ref = hpca.se, assay.type.test=1,
labels = hpca.se$label.main)
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
colData(qc.out$sce)$inferred.cell.type.broad = pred.ct$pruned.labels[match(colnames(qc.out$sce),rownames(pred.ct))]
plotReducedDim(qc.out$sce,dimred = "tSNE.imputed",colour_by = "inferred.cell.type.broad")
rm(hpca.se)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9265737 494.9 17784992 949.9 12358022 660.0
## Vcells 476020955 3631.8 941622255 7184.1 2211716566 16874.1
# Conversion to Seurat object:Not necessary to run.
if (FALSE)
{
library(Seurat)
seu = CreateSeuratObject(counts = counts(qc.out$sce),project = "pbmc8k",meta.data = as.data.frame(colData(qc.out$sce)))
seu[["RNA"]]@data = logcounts(qc.out$sce)
seu[["RNA"]]@scale.data = assay(qc.out$sce,"ALRA.logcounts")
# Create DimReduc and add to object
xy = reducedDim(qc.out$sce,"PCA.imputed")
seu[['PCA']] <- CreateDimReducObject(embeddings = xy, key = "PCA_", global = T, assay = "RNA")
xy = reducedDim(qc.out$sce,"UMAP.imputed")
seu[['UMAP']] <- CreateDimReducObject(embeddings = xy, key = "UMAP_", global = T, assay = "RNA")
xy = reducedDim(qc.out$sce,"tSNE.imputed")
seu[['tSNE']] <- CreateDimReducObject(embeddings = xy, key = "tSNE_", global = T, assay = "RNA")
rm(xy)
# save converted seurat object
saveRDS(seu,file = paste0(out.dir,"/SEU.celltype_inferred.RDS"))
}
MSC requires the log-normalized gene expression matrix across the variable genes as the primary input. For correlations, we also provide binarized count matrix where count.matrix[i,j] = 1 if there is a non-zero count for ith gene in jth cell, otherwise count.matrix[i,j] = 0. This binarized matrix will further serve to model the correlation as a function of shared gene expressions between two cells. This is because the similarity between two cells is often dictated by the number of shared expressed genes. This workflow can be extended to any other similarity metrics.
min.size = min.cell
# Register cores
if (getDoParWorkers() == 1 & n.cores > 1 & !(n.cores == getDoParWorkers()))
{
cl <- parallel::makeCluster(n.cores)
registerDoParallel(cl)
}
# Use ALRA imputed data for correlation computation
gene.data = qc.out$gene.data.alra # variable gene stats
dat = subset(gene.data,bio > 0 & p.value < 0.05) # subset for variable genes
bcnt.alra = counts(qc.out$sce)[rownames(dat),] # binarized count matrix for variable genes
m.alra = as.matrix(assay(qc.out$sce,"ALRA.logcounts")[rownames(dat),]) # log-normalized matrix for variable genes
bcnt.alra[bcnt.alra > 1E-320] = 1
### Calculate LEN: Use pairwise Pearson's correlation for cell cell similarity. Note that dist.func = function(a, b) cor(a,b,method ="pearson") is used to calculate pairwise Pearson's correlation.
len.out = generate_cell_network.wt.loess(
mat = m.alra,
bcnt = bcnt.alra,
dist.func = function(a, b) cor(a,b,method ="pearson"),
is.decreasing = TRUE,
n.cores = n.cores
);
## - #. jobs per node:2091
## Warning in e$fun(obj, substitute(ex), parent.frame(), e$data): already
## exporting variable(s): ng.per.cell, min.k, max.k
## - runtime to calculate LEN per node:
## user system elapsed
## 0.293 0.032 83.761
## - Check duplicates...
## processed:1000/50007=2%
## processed:2000/50007=4%
## processed:3000/50007=6%
## processed:4000/50007=8%
## processed:5000/50007=10%
## processed:6000/50007=12%
## processed:7000/50007=14%
## processed:8000/50007=16%
## processed:9000/50007=18%
## processed:10000/50007=20%
## processed:11000/50007=22%
## processed:12000/50007=24%
## processed:13000/50007=26%
## processed:14000/50007=28%
## processed:15000/50007=30%
## processed:16000/50007=32%
## processed:17000/50007=34%
## processed:18000/50007=36%
## processed:19000/50007=38%
## processed:20000/50007=40%
## processed:21000/50007=42%
## processed:22000/50007=44%
## processed:23000/50007=46%
## processed:24000/50007=48%
## processed:25000/50007=50%
## processed:26000/50007=52%
## processed:27000/50007=54%
## processed:28000/50007=56%
## processed:29000/50007=58%
## processed:30000/50007=60%
## processed:31000/50007=62%
## processed:32000/50007=64%
## processed:33000/50007=66%
## processed:34000/50007=68%
## processed:35000/50007=70%
## processed:36000/50007=72%
## processed:37000/50007=74%
## processed:38000/50007=76%
## processed:39000/50007=78%
## processed:40000/50007=80%
## processed:41000/50007=82%
## processed:42000/50007=84%
## processed:43000/50007=86%
## processed:44000/50007=88%
## processed:45000/50007=90%
## processed:46000/50007=92%
## processed:47000/50007=94%
## processed:48000/50007=96%
## processed:49000/50007=98%
## processed:50000/50007=100%
## - Calculate mutual neighbor ratios...
## processed:1000/50007=2%
## processed:2000/50007=4%
## processed:3000/50007=6%
## processed:4000/50007=8%
## processed:5000/50007=10%
## processed:6000/50007=12%
## processed:7000/50007=14%
## processed:8000/50007=16%
## processed:9000/50007=18%
## processed:10000/50007=20%
## processed:11000/50007=22%
## processed:12000/50007=24%
## processed:13000/50007=26%
## processed:14000/50007=28%
## processed:15000/50007=30%
## processed:16000/50007=32%
## processed:17000/50007=34%
## processed:18000/50007=36%
## processed:19000/50007=38%
## processed:20000/50007=40%
## processed:21000/50007=42%
## processed:22000/50007=44%
## processed:23000/50007=46%
## processed:24000/50007=48%
## processed:25000/50007=50%
## processed:26000/50007=52%
## processed:27000/50007=54%
## processed:28000/50007=56%
## processed:29000/50007=58%
## processed:30000/50007=60%
## processed:31000/50007=62%
## processed:32000/50007=64%
## processed:33000/50007=66%
## processed:34000/50007=68%
## processed:35000/50007=70%
## processed:36000/50007=72%
## processed:37000/50007=74%
## processed:38000/50007=76%
## processed:39000/50007=78%
## processed:40000/50007=80%
## processed:41000/50007=82%
## processed:42000/50007=84%
## processed:43000/50007=86%
## processed:44000/50007=88%
## processed:45000/50007=90%
## processed:46000/50007=92%
## processed:47000/50007=94%
## processed:48000/50007=96%
## processed:49000/50007=98%
## processed:50000/50007=100%
## - runtime to calculate mutual neighbor ratios:
## user system elapsed
## 58.022 0.260 58.283
## Checking relationship between shared neighbors and correlations...
##
## FALSE TRUE
## 41840 862
## Pruning low betweenness edges...
### Run iterative top-down clustering
d.func = function(x) sqrt(2*(1-x)) # use metric distance between the cells for correlations. The distance metric is used to calculate the cluster compactness
# identify alpha, the scaling parameter to calculate the cluster compactness
cout = components(len.out)
ci = which(cout$csize > log(vcount(len.out)))
glst = lapply(ci,function(n) induced_subgraph(graph = len.out,vids = names(cout$membership)[cout$membership == n]))
alpha.res = sapply(glst,function(gi) identify_scale_parameter(g = gi,nv = 20,nr.max = 3,seed = 1234,d.func = d.func,mode = "diameter"))[1]
## Scale parameter:
## [1] 1.788194
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9270985 495.2 17784992 949.9 17784992 949.9
## Vcells 488539352 3727.3 941622255 7184.1 2211716566 16874.1
# iterative clustering
iter.res = iterative_clustering(g.in = len.out,min.size = min.size,d.func = d.func,alpha = alpha.res)
## ##### Multiscale clustering iteration.1
## - current number of identified modules:0
## - number of parent modules to split:1
## - parent module names:M0
## ##### Multiscale clustering iteration.2
## - current number of identified modules:2
## - number of parent modules to split:1
## - parent module names:M1
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M3 M4 M5 M6 M7 M8
## 2183 61 384 3248 1131 1124
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## ##### Multiscale clustering iteration.3
## - current number of identified modules:8
## - number of parent modules to split:6
## - parent module names:M3,M4,M5,M6,M7,M8
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M9 M10 M11 M12 M13 M14
## 856 476 217 168 226 240
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M15 M16 M17
## 35 15 11
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M18 M19 M20
## 252 97 35
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M21 M22
## 2249 999
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M23 M24 M25
## 364 406 361
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M26 M27 M28 M29
## 541 226 232 125
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## ##### Multiscale clustering iteration.4
## - current number of identified modules:26
## - number of parent modules to split:12
## - parent module names:M9,M10,M13,M18,M19,M21,M23,M25,M26,M27,M28,M29
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M30 M31
## 9 847
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M32 M33
## 17 459
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M34 M35
## 161 65
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M36 M37 M38
## 100 71 81
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M39 M40 M41
## 43 51 3
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M42 M43 M44
## 895 1096 258
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M45
## 364
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M45 M46
## 225 136
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M47 M48 M49
## 130 229 182
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M50 M51
## 173 53
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M52 M53
## 66 166
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M54 M55
## 60 65
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## ##### Multiscale clustering iteration.5
## - current number of identified modules:46
## - number of parent modules to split:9
## - parent module names:M31,M33,M34,M38,M40,M42,M43,M48,M52
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M56 M57
## 278 569
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M58 M59
## 180 279
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M60 M61
## 77 84
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M62 M63 M64
## 30 32 19
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M65 M66
## 38 13
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M67
## 895
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M67 M68
## 1091 5
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M69 M70
## 114 115
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M71 M72
## 51 15
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## ##### Multiscale clustering iteration.6
## - current number of identified modules:56
## - number of parent modules to split:5
## - parent module names:M59,M60,M65,M67,M71
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M73 M74
## 218 61
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M75 M76
## 36 41
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M77 M78
## 20 18
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M79 M80 M81
## 791 291 9
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M82 M83
## 33 18
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## ##### Multiscale clustering iteration.7
## - current number of identified modules:63
## - number of parent modules to split:3
## - parent module names:M73,M77,M79
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M84 M85
## 114 104
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## - clusters:
## M86 M87 M88
## 12 5 3
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M89 M90
## 359 432
## - evaluate compactness with alpha = 1.78819411526898
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
# probe cluster hierarchy to identify the main root cluster.
do.probe = TRUE
mo = "M0"
while (do.probe)
{
tbl = subset(iter.res$module.table,parent.cluster == mo & module.size >= min.cells)
if (nrow(tbl) > 1)
{
do.probe = FALSE
}else{
mo = tbl$cluster.name[1]
}
}
iter.res$root.cluster = mo
#saveRDS(iter.res,file = paste0(out.dir,"/MSC_Pearson.RDS"))
iter.res.cor = iter.res;
rm(iter.res)
In contrary to Section 3, we use Euclidean distance in PCA space to perform MSC. Since PCA-based calculation does not directly utilize the expression matrix across all genes, the binarized count matrix is not necessary for down-stream analysis. Again, this workflow can be extended to any other dissimilarity metrics.
min.size = min.cell
# Register cores
if (getDoParWorkers() == 1 & n.cores > 1 & !(n.cores == getDoParWorkers()))
{
cl <- parallel::makeCluster(n.cores)
registerDoParallel(cl)
}
# get PCA as the imput
mat = as.matrix(t(reducedDim(qc.out$sce,"PCA.imputed")))
# Calculate LEN: Use pairwise Euclidean distance in PCA space for cell cell similarity. Note that dist.func = function(x,y) sqrt(sum((x-y)^2)) is used with is.decreasing = FALSE to sort the cell pairs from short distances to longer distances.
len.out = generate_cell_network.wt.loess(
mat,
bcnt = NULL,
dist.func = function(x,y) sqrt(sum((x-y)^2)),
is.decreasing = FALSE,
n.cores = 4
);
## - #. jobs per node:2091
## Warning in e$fun(obj, substitute(ex), parent.frame(), e$data): already
## exporting variable(s): min.k, max.k
## - runtime:
## user system elapsed
## 0.032 0.012 97.321
## - Check duplicates...
## processed:1000/37494=2.67%
## processed:2000/37494=5.33%
## processed:3000/37494=8%
## processed:4000/37494=10.7%
## processed:5000/37494=13.3%
## processed:6000/37494=16%
## processed:7000/37494=18.7%
## processed:8000/37494=21.3%
## processed:9000/37494=24%
## processed:10000/37494=26.7%
## processed:11000/37494=29.3%
## processed:12000/37494=32%
## processed:13000/37494=34.7%
## processed:14000/37494=37.3%
## processed:15000/37494=40%
## processed:16000/37494=42.7%
## processed:17000/37494=45.3%
## processed:18000/37494=48%
## processed:19000/37494=50.7%
## processed:20000/37494=53.3%
## processed:21000/37494=56%
## processed:22000/37494=58.7%
## processed:23000/37494=61.3%
## processed:24000/37494=64%
## processed:25000/37494=66.7%
## processed:26000/37494=69.3%
## processed:27000/37494=72%
## processed:28000/37494=74.7%
## processed:29000/37494=77.3%
## processed:30000/37494=80%
## processed:31000/37494=82.7%
## processed:32000/37494=85.3%
## processed:33000/37494=88%
## processed:34000/37494=90.7%
## processed:35000/37494=93.3%
## processed:36000/37494=96%
## processed:37000/37494=98.7%
## - Calculate mutual neighbor ratios...
## processed:1000/37494=2.67%
## processed:2000/37494=5.33%
## processed:3000/37494=8%
## processed:4000/37494=10.7%
## processed:5000/37494=13.3%
## processed:6000/37494=16%
## processed:7000/37494=18.7%
## processed:8000/37494=21.3%
## processed:9000/37494=24%
## processed:10000/37494=26.7%
## processed:11000/37494=29.3%
## processed:12000/37494=32%
## processed:13000/37494=34.7%
## processed:14000/37494=37.3%
## processed:15000/37494=40%
## processed:16000/37494=42.7%
## processed:17000/37494=45.3%
## processed:18000/37494=48%
## processed:19000/37494=50.7%
## processed:20000/37494=53.3%
## processed:21000/37494=56%
## processed:22000/37494=58.7%
## processed:23000/37494=61.3%
## processed:24000/37494=64%
## processed:25000/37494=66.7%
## processed:26000/37494=69.3%
## processed:27000/37494=72%
## processed:28000/37494=74.7%
## processed:29000/37494=77.3%
## processed:30000/37494=80%
## processed:31000/37494=82.7%
## processed:32000/37494=85.3%
## processed:33000/37494=88%
## processed:34000/37494=90.7%
## processed:35000/37494=93.3%
## processed:36000/37494=96%
## processed:37000/37494=98.7%
## Checking relationship between shared neighbors and correlations...
##
## FALSE TRUE
## 30942 1438
## Pruning low betweenness edges...
# run iterative top-down clusters
d.func = function(x) x # the pairwise distance is passed onto the LEN network by identity function, f(x) = x.
cout = components(len.out)
ci = which(cout$csize > log(vcount(len.out)))
glst = lapply(ci,function(n) induced_subgraph(graph = len.out,vids = names(cout$membership)[cout$membership == n]))
alpha.res = sapply(glst,function(gi) identify_scale_parameter(g = gi,nv = 20,nr.max = 3,seed = 1234,d.func = d.func,mode = "diameter"))[1]
## Scale parameter:
## [1] 2.978133
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9300347 496.7 17784992 949.9 17784992 949.9
## Vcells 488936144 3730.3 941622255 7184.1 2211716566 16874.1
# run MSC
iter.res = iterative_clustering(g.in = len.out,min.size = min.size,d.func = d.func,alpha = alpha.res)
## ##### Multiscale clustering iteration.1
## - current number of identified modules:0
## - number of parent modules to split:1
## - parent module names:M0
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M1 M2 M3 M4 M5 M6 M7 M8 M9
## 296 1724 1245 1158 286 2021 1135 97 222
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## ##### Multiscale clustering iteration.2
## - current number of identified modules:9
## - number of parent modules to split:7
## - parent module names:M1,M2,M3,M5,M6,M7,M9
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M10 M11 M12
## 62 123 111
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M13 M14 M15 M16 M17
## 181 776 253 22 492
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M18 M19 M20 M21
## 277 676 9 283
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M22 M23
## 249 37
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M24 M25
## 837 1184
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M26 M27 M28 M29
## 240 517 256 122
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M30 M31
## 85 137
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## ##### Multiscale clustering iteration.3
## - current number of identified modules:29
## - number of parent modules to split:10
## - parent module names:M11,M14,M19,M21,M22,M24,M25,M26,M27,M28
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M32
## 123
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M32 M33 M34 M35
## 161 174 253 188
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M36
## 676
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M36 M37 M38
## 108 151 24
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M39 M40 M41 M42
## 70 44 71 64
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M43 M44
## 377 460
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M45 M46 M47
## 405 385 394
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M48 M49
## 181 59
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M50 M51
## 384 133
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M52 M53 M54 M55
## 41 87 83 45
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## ##### Multiscale clustering iteration.4
## - current number of identified modules:34
## - number of parent modules to split:2
## - parent module names:M36,M50
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M56 M57 M58
## 46 34 28
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
## # Commence coarse-grained structure detection...
## Step detected...
## - clusters:
## M59 M60 M61 M62
## 143 142 27 72
## - evaluate compactness with alpha = 2.97813324728707
## - evaluate intra-cluster connectivity
## -- Check random connectivity for controls...
## # Collect results
# probe cluster hierarchy to identify the main root cluster.
do.probe = TRUE
mo = "M0"
while (do.probe)
{
tbl = subset(iter.res$module.table,parent.cluster == mo & module.size >= min.cells)
if (nrow(tbl) > 1)
{
do.probe = FALSE
}else{
mo = tbl$cluster.name[1]
}
}
iter.res$root.cluster = mo
#saveRDS(iter.res,file = paste0(out.dir,"/MSC_Euclidean.RDS"))
iter.res.euc = iter.res;
rm(iter.res)
The cluster hierarchy has been integrated with the dotplot to show the marker expression architecture.
##### established markers: We
# get the markers
ct.markers = c("T-cell" = "CD3D","CD4" = "CD4","CD8" = "CD8A","CD8" = "NKG7",
"effector CD8" = "IFNG",
"Treg" = "FOXP3","Treg" = "IL2RA",
"B-cell" = "CD19","Monocyte" = "CD14",
"M1" = "HLA-DRA","M2" = "MSR1","MDSC" = "S100A9",
"Dendritic" = "NRP1",
"NK" = "NCAM1","Erythrocyte" = "HBA1","Platelet" = "PF4")
ct.markers.mapped = rownames(qc.out$sce)[match(ct.markers,gsub("\\|(.*)$","",rownames(qc.out$sce)))];
names(ct.markers.mapped) = names(ct.markers);
### get the plot
cnt = counts(qc.out$sce)
mat = assay(qc.out$sce,"ALRA.logcounts")
rownames(cnt) = rownames(mat)
htbl = iter.res.cor$module.table[,c("cluster.name","parent.cluster")];colnames(htbl) = c("module.id","module.parent")
modules = iter.res.cor$modules
hplot.out = plot_hierarchy_dot.raw(m = mat,cnts = cnt,
htbl,modules,celltype.markers = ct.markers.mapped,pct.mark = 0.1,
plot.hierarchy = TRUE,
filename = "pbmc_8k/hierarchy.celltype.png",width.per.plot = 500,height.per.plot = 3500)
## Calculate marker statistics...
## Plotting the hierarchy with dots...
## output plots to:pbmc_8k/hierarchy.celltype.png
plot_grid(plotlist = lapply(hplot.out$plotlist,function(x) x + theme(legend.position = "none")),ncol = 4)
### get embedding coordinates
xy = reducedDim(qc.out$sce,"tSNE.imputed")
xy = xy[colnames(qc.out$sce),]
### clustering results
# get the membership rooted to certain parent module
vec = get_rooted_membership(mtbl = iter.res.cor$module.table,
modules = iter.res.cor$modules,
pid = iter.res.cor$root.cluster,bg = colnames(qc.out$sce))
pobj.msc = plot_dimred_manual.raw(xy,
cell.feat = vec,
alpha.val = 0.3,cls.col = NULL,add.labels = TRUE)
# add other aesthetic features
pobj.msc = pobj.msc +
guides(colour = "none") +
theme_classic() +
theme(axis.line = element_blank(),axis.title = element_blank(),axis.text = element_blank(),axis.ticks = element_blank()) +
annotate(geom = "segment",x = quantile(xy[,1],0),xend = quantile(xy[,1],0.1),y = min(xy[,2]),yend = min(xy[,2]),colour = "black",arrow = grid::arrow(length = unit(0.015, "npc"))) +
annotate(geom = "segment",x = min(xy[,1]),xend = min(xy[,1]),y = min(xy[,2]),yend = quantile(xy[,2],0.1),colour = "black",arrow = grid::arrow(length = unit(0.015, "npc")) )
### get cell type tSNE plots
vec = colData(qc.out$sce)$inferred.cell.type.broad;names(vec) = colnames(qc.out$sce)
pobj.ct.broad = plot_dimred_manual.raw(xy,cell.feat =vec,alpha.val = 0.3,cls.col = NULL,add.labels = TRUE) + guides(colour = "none") +
theme_classic() +
theme(axis.line = element_blank(),axis.title = element_blank(),axis.text = element_blank(),axis.ticks = element_blank()) +
annotate(geom = "segment",x = quantile(xy[,1],0),xend = quantile(xy[,1],0.1),y = min(xy[,2]),yend = min(xy[,2]),colour = "black",arrow = grid::arrow(length = unit(0.015, "npc"))) +
annotate(geom = "segment",x = min(xy[,1]),xend = min(xy[,1]),y = min(xy[,2]),yend = quantile(xy[,2],0.1),colour = "black",arrow = grid::arrow(length = unit(0.015, "npc")) )
library(cowplot)
print(plot_grid(pobj.msc,pobj.ct.broad,ncol = 2))
The multi-scale clustering architecture in MSC can be effectively represented as a hierarchical dendrogram. Here, we make use of this hierarchy to visualize the cell cluster characteristic by pie charts at different hierarchical levels, allowing to inspect the relationship between the hierarchy and cell cluster features.
library(RColorBrewer)
# assign cell type colors
vec.ct = colData(qc.out$sce)$inferred.cell.type.broad;names(vec.ct) = colnames(qc.out$sce);
ct = unique(vec.ct)
colmap.ct = brewer.pal(n = length(ct),"Set3")
names(colmap.ct) = ct
# get cluster hierarchy
htbl = subset(iter.res.cor$module.table,module.size >= 30)[,c("cluster.name","parent.cluster")]
modules = iter.res.cor$modules
# make piechart
pie.ct = plot_hierarchy_pie(htbl,modules,vec.ct,colmap = colmap.ct,edge_colour = "grey")
pie.ct = pie.ct + guides(fill = guide_legend(label.theme = element_text(size = 15),title.theme = element_text(size = 17),title = "Cell Type"))
# extract legend
library(ggpubr)
leg = get_legend(pie.ct)
leg = as_ggplot(leg)
txt.dat = pie.ct$data[,c("x","y","name")]
txt.dat = subset(txt.dat,name %in% c(subset(iter.res.cor$module.table,parent.cluster == "M1")$cluster.name))
library(ggrepel)
pie.ct = pie.ct + geom_text_repel(data = txt.dat,aes(x = x,y = y,label = name),colour = "darkblue",segment.colour = "darkblue",vjust = 1.5,size = 6) +
guides(fill = "none") +
theme(panel.border = element_blank(),legend.position = "right",legend.direction = "vertical",
legend.text = element_text(size = 16),
plot.title = element_text(size = 20,hjust = 0.5),
plot.margin = margin(t = 0,r = 0,b = 0,l = 0))
pie.ct = plot_grid(pie.ct,leg,ncol = 1,rel_heights = c(0.8,0.3))
print(pie.ct)