# BLAZE q20
trans_B_q20 <- 'data/trans_count_B+F_q20_rebase.csv'
gene_B_q20 <- 'data/gene_count_B+F_q20_rebase.csv'
# BLAZE non q20 GridIon
trans_B_nonq20_gd <- 'data/trans_count_B+F_non_q20_rebase.csv'
gene_B_nonq20_gd <- 'data/gene_count_B+F_non_q20_rebase.csv'
# BLAZE non q20 PromithIon
trans_B_nonq20_pmth <- 'data/trans_count_B+F_promith.csv'
gene_B_nonq20_pmth <- 'data/gene_count_B+F_promith.csv'
# BLAZE Matt data
trans_B_matt <- 'data/trans_count_B+F_matt.csv'
gene_B_matt <- 'data/gene_count_B+F_matt.csv'

# Cellranger q20
trans_C_q20 <- 'data/trans_count_C+F_q20_rebase.csv'
gene_C_q20 <- 'data/gene_count_C+F_q20_rebase.csv'
# Cellranger non q20 GridIon
trans_C_nonq20_gd <- 'data/trans_count_C+F_non_q20_rebase.csv'
gene_C_nonq20_gd <- 'data/gene_count_C+F_non_q20_rebase.csv'
# Cellranger non q20 PromithIon
trans_C_nonq20_pmth <- 'data/trans_count_C+F_promith.csv'
gene_C_nonq20_pmth <- 'data/gene_count_C+F_promith.csv'
# Cellranger Matt data
trans_C_matt <- 'data/trans_count_C+F_matt.csv'
gene_C_matt <- 'data/gene_count_C+F_matt.csv'


# Sockeye q20
trans_S_q20 <- 'data/trans_count_S+F_q20_rebase.csv'
gene_S_q20 <- 'data/gene_count_S+F_q20_rebase.csv'
# Sockeye non q20 GridIon
trans_S_nonq20_gd <- 'data/trans_count_S+F_non_q20_rebase.csv'
gene_S_nonq20_gd <- 'data/gene_count_S+F_non_q20_rebase.csv'
# Sockeye non q20 PromithIon
trans_S_nonq20_pmth <- 'data/trans_count_S+F_promith.csv'
gene_S_nonq20_pmth <- 'data/gene_count_S+F_promith.csv'
# Sockeye Matt data
trans_S_matt <- 'data/trans_count_S+F_matt.csv'
gene_S_matt <- 'data/gene_count_S+F_matt.csv'


# BLAZE HS Promith 
trans_B_HS_Promith <- 'data/transcript_count_BLAZE_HS_Promith.csv'
gene_B_HS_Promith <- 'data/gene_count_BLAZE_HS_Promith.csv'

# BLAZE HS SCMIXOLOGY
trans_B_HS_Matt <- 'data/transcript_count_BLAZE_HS_Matt.csv'
gene_B_HS_Matt <- 'data/gene_count_BLAZE_HS_Matt.csv'
plot_umap_matt <- function(count.matrix, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7, fig_name = ''){

  # init 
  rst_figures <- list()
  rst_table = data.frame()

    counts <- read.csv(count.matrix)
    seurat_object <- CreateSeuratObject(counts = counts, project = "singlecell", min.cells = 3,min.features=1)
    rst_table <- rbind(rst_table, data.frame("Cells"=dim(seurat_object)[2],
                                             "median feature per Cell"=median(seurat_object$nFeature_RNA), row.names = paste0('min features > 0'),check.names = FALSE))
    seurat_object <- CreateSeuratObject(counts = counts, project = "singlecell", min.cells = 3, min.features = min.features)

    
    #remove unwanted cells. below are default settings but you can modify these
    seurat_object <- subset(seurat_object, subset = nFeature_RNA > min.features & nFeature_RNA < max.feature) 
        rst_table <- rbind(rst_table, data.frame("Cells"=dim(seurat_object)[2],
                                             "median feature per Cell"=median(seurat_object$nFeature_RNA), row.names = paste0('min features > 0', min.features),check.names = FALSE))
    
    #now you have removed unwanted cells, it is time to normalize the data. By default, Seurat employs a global-scaling normalization method "LogNormalize" that normalizes the feature expression measurements for each cell by the total expression, multiplies this by a scale factor (10,000 by default), and log-transforms the result.
    seurat_object <- NormalizeData(seurat_object, normalization.method = "LogNormalize", scale.factor = 10000)
    #you can alternatively input seurat_object <- NormalizeData(seurat_object) instead of above.
    #we now identify highly variable features in order to determine a subset of features that exhibit high cell-to-cell 
    
    #variation in the dataset.
    seurat_object <- FindVariableFeatures(seurat_object, selection.method = "vst", nfeatures = 2000)
    
    #now we apply a linear transformation (scaling) that is a standard pre-processing step prior to dimensional reduction techniques like PCA
    all.genes <- rownames(seurat_object)
    seurat_object <- ScaleData(seurat_object, features = all.genes)
    #we can visualise both cells and features that define the PCA
    seurat_object <- RunPCA(seurat_object, features = VariableFeatures(object = seurat_object))
    #this can be visualised using an Elbow Plot
    #rst_figures <- append(rst_figures, ElbowPlot(seurat_object))
    
    #to cluster cells, I have used the number of sig. PCs that I observed in the above plots. The findneighbors function is for constuction of a KNN graph based on euclidean distance in PCA space and refines edge weights between any two cells based on the shared overlap in their local neighborhoods (jaccard similarity). It uses the input of previously defined dimensionality of the dataset.
    seurat_object <- FindNeighbors(seurat_object, dims = 1:npc)
    #now to actually cluster the cells, we apply modularity optimisation techniques (default is Louvain algorithm). The findclusters function contains a resolution parameter which sets the granularity of downstream clustering. Settings are recommended between 0.4-1.2, but may need to be increased for larger datasets.
    seurat_object <- FindClusters(seurat_object, resolution = cluster_res)
    
    #to View metadata
    #seurat_object@meta.data
    #run non-linear dimensional reduction (UMAP/tSNE)
    seurat_object <- RunUMAP(seurat_object, dims = 1:npc)
    
    rst_figures <- append(rst_figures, list(DimPlot(seurat_object, reduction = "umap") + labs(color = "cluster \n(from PCA)", title = '') + theme(text = element_text(size = 10))  )) 
    
    
    # add meta data: cluster found by matt 'data/Matt_data_cell_barcodes_and_cluster.csv'
    df_matt = read.csv('data/cluster_annotation.csv')
    group_from_matt = unlist(lapply(colnames(seurat_object), function(x){ifelse(x %in% df_matt$barcode_seq,df_matt$groups[df_matt$barcode_seq == x], NA)}))
    seurat_object <- AddMetaData(seurat_object,  metadata = group_from_matt, col.name = 'group_from_matt')
    
    rst_figures <- append(rst_figures, list(
      DimPlot(seurat_object, reduction = "umap", group.by = 'group_from_matt')+labs(color = str_wrap("Known cell type from Matt's paper",15),title = '')+ theme(text = element_text(size = 10)),
      FeaturePlot(seurat_object, reduction = "umap", features = 'nCount_RNA')+labs(color = "UMI count",title = '')+ theme(text = element_text(size = 10)),
    FeaturePlot(seurat_object, reduction = "umap", features = 'nFeature_RNA')+labs(color = str_wrap("Feature count (isoform/gene)",15),title = '')+ theme(text = element_text(size = 10))
    ))
    
    
    plot_pc <- ElbowPlot(seurat_object)+labs(title = 'SD explained by each PC') + theme(text = element_text(size = 10))
    plot_umap <- grid.arrange(plot_pc, tableGrob(rst_table), rst_figures[[1]], rst_figures[[2]], rst_figures[[3]], rst_figures[[4]], ncol=2, top=textGrob(fig_name))
    list(plot_umap)
}

1 Matt data

1.1 UMAPs on transcript count

1.1.1 Cellranger + FLAME

# Cellranger
plots <- plot_umap_matt(trans_C_matt, min.features = 200,max.feature = 999999999, npc = 10, cluster_res = 0.7,fig_name = 'Cellranger+FLAME (Transcript counts, Matt\'s data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 245
## Number of edges: 5202
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8548
## Number of communities: 6
## Elapsed time: 0 seconds

1.1.2 Sockeye + FLAME

# Sockeye
plots <- plot_umap_matt(trans_S_matt, min.features = 200,max.feature = 999999999, npc = 10, cluster_res = 0.7,fig_name = 'Sockeye+FLAME (Transcript counts, Matt\'s data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 412
## Number of edges: 8896
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8663
## Number of communities: 11
## Elapsed time: 0 seconds

1.1.3 BLAZE + FLAME

plots <- plot_umap_matt(trans_B_matt, min.features = 200,max.feature = 999999999, npc = 10, cluster_res = 0.7,fig_name = 'BLAZE+FLAME (Transcript counts, Matt\'s data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 187
## Number of edges: 3542
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8330
## Number of communities: 5
## Elapsed time: 0 seconds

1.1.4 BLAZE HS + FLAME

plots <- plot_umap_matt(trans_B_HS_Matt, min.features = 200,max.feature = 999999999, npc = 10, cluster_res = 0.7,fig_name = 'BLAZE_HS+FLAME (Transcript counts, Matt\'s data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 256
## Number of edges: 5581
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8500
## Number of communities: 6
## Elapsed time: 0 seconds

1.2 UMAPs on Gene count

1.2.1 Cellranger + FLAME

# Cellranger
plots <- plot_umap_matt(gene_C_matt, min.features = 200,max.feature = 999999999, npc = 10, cluster_res = 0.7, fig_name = 'Cellranger+FLAME (Gene counts, Matt\'s data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 244
## Number of edges: 5044
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8600
## Number of communities: 6
## Elapsed time: 0 seconds

1.2.2 Sockeye + FLAME

# Sockeye
plots <- plot_umap_matt(gene_S_matt, min.features = 200,max.feature = 999999999, npc = 10, cluster_res = 0.7,fig_name = 'Sockeye+FLAME (Gene counts, Matt\'s data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 408
## Number of edges: 8519
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8806
## Number of communities: 11
## Elapsed time: 0 seconds

1.2.3 BLAZE + FLAME

plots <- plot_umap_matt(gene_B_matt, min.features = 200,max.feature = 999999999, npc = 10, cluster_res = 0.7,fig_name = 'BLAZE+FLAME (Gene counts, Matt\'s data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 187
## Number of edges: 3612
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8271
## Number of communities: 5
## Elapsed time: 0 seconds

1.2.4 BLAZE HS + FLAME

plots <- plot_umap_matt(gene_B_HS_Matt, min.features = 200,max.feature = 999999999, npc = 10, cluster_res = 0.7,fig_name = 'BLAZE_HS+FLAME (Gene counts, Matt\'s data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 253
## Number of edges: 5463
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8605
## Number of communities: 6
## Elapsed time: 0 seconds

1.2.5 Sockeye pipeline

UAMP from Sockeye pipeline

plot_umap <- function(count.matrix, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7, fig_name = ''){

  # init 
  rst_figures <- list()
  rst_table = data.frame()

    counts <- read.csv(count.matrix)
    
    seurat_object <- CreateSeuratObject(counts = counts, project = "singlecell", min.cells = 3, min.features = 0.5)
    rst_table <- rbind(rst_table, data.frame("Cells"=dim(seurat_object)[2],
                                             "median feature per Cell"=median(seurat_object$nFeature_RNA), row.names = paste0('min features > 0'),check.names = FALSE))
    
    #remove unwanted cells. below are default settings but you can modify these
    seurat_object <- subset(seurat_object, subset = nFeature_RNA > min.features & nFeature_RNA < max.feature) 
        rst_table <- rbind(rst_table, data.frame("Cells"=dim(seurat_object)[2],
                                             "median feature per Cell"=median(seurat_object$nFeature_RNA), row.names = paste0('min features > 0', min.features),check.names = FALSE))
    
    #now you have removed unwanted cells, it is time to normalize the data. By default, Seurat employs a global-scaling normalization method "LogNormalize" that normalizes the feature expression measurements for each cell by the total expression, multiplies this by a scale factor (10,000 by default), and log-transforms the result.
    seurat_object <- NormalizeData(seurat_object, normalization.method = "LogNormalize", scale.factor = 10000)
    #you can alternatively input seurat_object <- NormalizeData(seurat_object) instead of above.
    #we now identify highly variable features in order to determine a subset of features that exhibit high cell-to-cell 
    
    #variation in the dataset.
    seurat_object <- FindVariableFeatures(seurat_object, selection.method = "vst", nfeatures = 2000)
    
    #now we apply a linear transformation (scaling) that is a standard pre-processing step prior to dimensional reduction techniques like PCA
    all.genes <- rownames(seurat_object)
    seurat_object <- ScaleData(seurat_object, features = all.genes)
    #we can visualise both cells and features that define the PCA
    seurat_object <- RunPCA(seurat_object, features = VariableFeatures(object = seurat_object))
    #this can be visualised using an Elbow Plot
    #rst_figures <- append(rst_figures, ElbowPlot(seurat_object))
    
    #to cluster cells, I have used the number of sig. PCs that I observed in the above plots. The findneighbors function is for constuction of a KNN graph based on euclidean distance in PCA space and refines edge weights between any two cells based on the shared overlap in their local neighborhoods (jaccard similarity). It uses the input of previously defined dimensionality of the dataset.
    seurat_object <- FindNeighbors(seurat_object, dims = 1:npc)
    #now to actually cluster the cells, we apply modularity optimisation techniques (default is Louvain algorithm). The findclusters function contains a resolution parameter which sets the granularity of downstream clustering. Settings are recommended between 0.4-1.2, but may need to be increased for larger datasets.
    seurat_object <- FindClusters(seurat_object, resolution = cluster_res)
    
    #to View metadata
    #seurat_object@meta.data
    #run non-linear dimensional reduction (UMAP/tSNE)
    seurat_object <- RunUMAP(seurat_object, dims = 1:npc)
    
    rst_figures <- append(rst_figures, list(DimPlot(seurat_object, reduction = "umap") + labs(color = "cluster \n(from PCA)", title = '') + theme(text = element_text(size = 10))  )) 
    
    
    rst_figures <- append(rst_figures, list(
      FeaturePlot(seurat_object, reduction = "umap", features = 'nCount_RNA')+labs(color = "UMI count",title = '')+ theme(text = element_text(size = 10)),
    FeaturePlot(seurat_object, reduction = "umap", features = 'nFeature_RNA')+labs(color = str_wrap("Feature count (isoform/gene)",15),title = '')+ theme(text = element_text(size = 10))
    ))
    
    
    plot_pc <- ElbowPlot(seurat_object)+labs(title = 'SD explained by each PC') + theme(text = element_text(size = 10))
    plot_umap <- grid.arrange(plot_pc, tableGrob(rst_table), rst_figures[[1]], rst_figures[[2]], rst_figures[[3]], ncol=2, top=textGrob(fig_name))
    list(plot_umap, seurat_object)
}

2 Sefi’s Q20 data

2.1 UMAPs on transcript count

2.1.1 Cellranger + FLAME

# Cellranger
plots <- plot_umap(trans_C_q20, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Cellranger+FLAME (Transcript counts, Q20 data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 618
## Number of edges: 19499
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6830
## Number of communities: 5
## Elapsed time: 0 seconds

umap_object_C <- plots[[2]]

2.1.2 Sockeye + FLAME

# Sockeye
plots <- plot_umap(trans_S_q20, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Sockeye+FLAME (Transcript counts, Q20 data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 626
## Number of edges: 19675
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6933
## Number of communities: 5
## Elapsed time: 0 seconds

umap_object_S <- plots[[2]]

2.1.3 BLAZE + FLAME

plots <- plot_umap(trans_B_q20, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'BLAZE+FLAME (Transcript counts, Q20 data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 620
## Number of edges: 19092
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6839
## Number of communities: 6
## Elapsed time: 0 seconds

umap_object_B <- plots[[2]]

2.1.4 Compare the clusters between BLAZE, Sockeye and Cellranger

With the UMAP plots above, I colored cells in BLAZE or Sockeye plot based on the clusters identified in Cellranger result.

# cell Ranger    

cluster_map_C = umap_object_C$seurat_clusters
C_cluster_in_B = unlist(lapply(colnames(umap_object_B), function(x){cluster_map_C[x]}))
C_cluster_in_S = unlist(lapply(colnames(umap_object_S), function(x){cluster_map_C[x]}))

umap_object_B <- AddMetaData(umap_object_B,  metadata = C_cluster_in_B, col.name = 'c_cluster')
umap_object_S <- AddMetaData(umap_object_S,  metadata = C_cluster_in_S, col.name = 'c_cluster')

grid.arrange( DimPlot(umap_object_C, reduction = "umap") + labs(color = "CellRanger cluster", title = 'CellRanger') + theme(text = element_text(size = 10)),
              DimPlot(umap_object_B, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'BLAZE') + theme(text = element_text(size = 10)),
              DimPlot(umap_object_S, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'Sockeye') + theme(text = element_text(size = 10)),
              top=textGrob('Compare cells among UMAP'), nrow=1)

2.2 UMAPs on Gene count

2.2.1 Cellranger + FLAME

# Cellranger
plots <- plot_umap(gene_C_q20, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Cellranger+FLAME (Gene counts, Q20 data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 583
## Number of edges: 18015
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6699
## Number of communities: 5
## Elapsed time: 0 seconds

umap_object_C <- plots[[2]]

2.2.2 Sockeye + FLAME

# Sockeye
plots <- plot_umap(gene_S_q20, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Sockeye+FLAME (Gene counts, Q20 data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 583
## Number of edges: 18094
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6773
## Number of communities: 6
## Elapsed time: 0 seconds

umap_object_S <- plots[[2]]

2.2.3 BLAZE + FLAME

plots <- plot_umap(gene_B_q20, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'BLAZE+FLAME (Gene counts, Q20 data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 589
## Number of edges: 18369
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6822
## Number of communities: 5
## Elapsed time: 0 seconds

umap_object_B <- plots[[2]]

2.2.4 Compare the clusters between BLAZE, Sockeye and Cellranger

With the UMAP plots above, I colored cells in BLAZE or Sockeye plot based on the clusters identified in Cellranger result.

# cell Ranger    

cluster_map_C = umap_object_C$seurat_clusters
C_cluster_in_B = unlist(lapply(colnames(umap_object_B), function(x){cluster_map_C[x]}))
C_cluster_in_S = unlist(lapply(colnames(umap_object_S), function(x){cluster_map_C[x]}))

umap_object_B <- AddMetaData(umap_object_B,  metadata = C_cluster_in_B, col.name = 'c_cluster')
umap_object_S <- AddMetaData(umap_object_S,  metadata = C_cluster_in_S, col.name = 'c_cluster')

grid.arrange( DimPlot(umap_object_C, reduction = "umap") + labs(color = "CellRanger cluster", title = 'CellRanger') + theme(text = element_text(size = 10)),
              DimPlot(umap_object_B, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'BLAZE') + theme(text = element_text(size = 10)),
              DimPlot(umap_object_S, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'Sockeye') + theme(text = element_text(size = 10)),
              top=textGrob('Compare cells among UMAP'), nrow=1)

2.2.5 Sockeye pipeline

UAMP from Sockeye pipeline

3 Sefi’s non-Q20 GridIon data

3.1 UMAPs on transcript count

3.1.1 Cellranger + FLAME

# Cellranger
plots <- plot_umap(trans_C_nonq20_gd, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Cellranger+FLAME (Transcript counts, non-Q20 GridIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 788
## Number of edges: 23638
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7510
## Number of communities: 6
## Elapsed time: 0 seconds

umap_object_C <- plots[[2]]

3.1.2 Sockeye + FLAME

# Sockeye
plots <- plot_umap(trans_S_nonq20_gd, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Sockeye+FLAME (Transcript counts, non-Q20 GridIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 795
## Number of edges: 24157
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7606
## Number of communities: 7
## Elapsed time: 0 seconds

umap_object_S <- plots[[2]]

3.1.3 BLAZE + FLAME

plots <- plot_umap(trans_B_nonq20_gd, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'BLAZE+FLAME (Transcript counts, non-Q20 GridIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 788
## Number of edges: 23676
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7565
## Number of communities: 6
## Elapsed time: 0 seconds

umap_object_B <- plots[[2]]

3.1.4 Compare the clusters between BLAZE, Sockeye and Cellranger

With the UMAP plots above, I colored cells in BLAZE or Sockeye plot based on the clusters identified in Cellranger result.

# cell Ranger    

cluster_map_C = umap_object_C$seurat_clusters
C_cluster_in_B = unlist(lapply(colnames(umap_object_B), function(x){cluster_map_C[x]}))
C_cluster_in_S = unlist(lapply(colnames(umap_object_S), function(x){cluster_map_C[x]}))

umap_object_B <- AddMetaData(umap_object_B,  metadata = C_cluster_in_B, col.name = 'c_cluster')
umap_object_S <- AddMetaData(umap_object_S,  metadata = C_cluster_in_S, col.name = 'c_cluster')

grid.arrange( DimPlot(umap_object_C, reduction = "umap") + labs(color = "CellRanger cluster", title = 'CellRanger') + theme(text = element_text(size = 10)),
              DimPlot(umap_object_B, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'BLAZE') + theme(text = element_text(size = 10)),
              DimPlot(umap_object_S, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'Sockeye') + theme(text = element_text(size = 10)),
              top=textGrob('Compare cells among UMAP'), nrow=1)

3.2 UMAPs on Gene count

3.2.1 Cellranger + FLAME

# Cellranger
plots <- plot_umap(gene_C_nonq20_gd, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Cellranger+FLAME (Gene counts, non-Q20 GridIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 788
## Number of edges: 23241
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7665
## Number of communities: 6
## Elapsed time: 0 seconds

umap_object_C <- plots[[2]]

3.2.2 Sockeye + FLAME

# Sockeye
plots <- plot_umap(gene_S_nonq20_gd, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Sockeye+FLAME (Gene counts, non-Q20 GridIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 794
## Number of edges: 23930
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7722
## Number of communities: 6
## Elapsed time: 0 seconds

umap_object_S <- plots[[2]]

3.2.3 BLAZE + FLAME

plots <- plot_umap(gene_B_nonq20_gd, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'BLAZE+FLAME (Gene counts, non-Q20 GridIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 786
## Number of edges: 23203
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7651
## Number of communities: 6
## Elapsed time: 0 seconds

umap_object_B <- plots[[2]]

3.2.4 Compare the clusters among BLAZE, Sockeye and Cellranger

With the UMAP plots above, I colored cells in BLAZE or Sockeye plot based on the clusters identified in Cellranger result.

# cell Ranger    

cluster_map_C = umap_object_C$seurat_clusters
C_cluster_in_B = unlist(lapply(colnames(umap_object_B), function(x){cluster_map_C[x]}))
C_cluster_in_S = unlist(lapply(colnames(umap_object_S), function(x){cluster_map_C[x]}))

umap_object_B <- AddMetaData(umap_object_B,  metadata = C_cluster_in_B, col.name = 'c_cluster')
umap_object_S <- AddMetaData(umap_object_S,  metadata = C_cluster_in_S, col.name = 'c_cluster')

grid.arrange( DimPlot(umap_object_C, reduction = "umap") + labs(color = "CellRanger cluster", title = 'CellRanger') + theme(text = element_text(size = 10)),
              DimPlot(umap_object_B, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'BLAZE') + theme(text = element_text(size = 10)),
              DimPlot(umap_object_S, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'Sockeye') + theme(text = element_text(size = 10)),
              top=textGrob('Compare cells among UMAP'), nrow=1)

3.2.5 Sockeye pipeline

UAMP from Sockeye pipeline

4 Sefi’s non-Q20 PromethIon data

4.1 UMAPs on transcript count

4.1.1 Cellranger + FLAME

# Cellranger
plots <- plot_umap(trans_C_nonq20_pmth, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Cellranger+FLAME (Transcript counts, non-Q20 PromethIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 898
## Number of edges: 24680
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8284
## Number of communities: 8
## Elapsed time: 0 seconds

umap_object_C <- plots[[2]]

4.1.2 Sockeye + FLAME

# Sockeye
plots <- plot_umap(trans_S_nonq20_pmth, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Sockeye+FLAME (Transcript counts, non-Q20 PromethIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1379
## Number of edges: 39464
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8201
## Number of communities: 10
## Elapsed time: 0 seconds

umap_object_S <- plots[[2]]

4.1.3 BLAZE + FLAME

plots <- plot_umap(trans_B_nonq20_pmth, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'BLAZE+FLAME (Transcript counts, non-Q20 PromethIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 803
## Number of edges: 21735
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7958
## Number of communities: 7
## Elapsed time: 0 seconds

umap_object_B <- plots[[2]]

4.1.4 BLAZE HS + FLAME

plots <- plot_umap(trans_B_HS_Promith, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'BLAZE+FLAME (Transcript counts, non-Q20 PromethIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 985
## Number of edges: 27377
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8350
## Number of communities: 9
## Elapsed time: 0 seconds

umap_object_B_HS <- plots[[2]]

4.1.5 Compare the clusters among BLAZE, Sockeye and Cellranger

With the UMAP plots above, I colored cells in BLAZE or Sockeye plot based on the clusters identified in Cellranger result.

# cell Ranger    

cluster_map_C = umap_object_C$seurat_clusters
C_cluster_in_B = unlist(lapply(colnames(umap_object_B), function(x){cluster_map_C[x]}))
C_cluster_in_S = unlist(lapply(colnames(umap_object_S), function(x){cluster_map_C[x]}))
C_cluster_in_B_HS = unlist(lapply(colnames(umap_object_B_HS), function(x){cluster_map_C[x]}))

umap_object_B <- AddMetaData(umap_object_B,  metadata = C_cluster_in_B, col.name = 'c_cluster')
umap_object_S <- AddMetaData(umap_object_S,  metadata = C_cluster_in_S, col.name = 'c_cluster')
umap_object_B_HS <- AddMetaData(umap_object_B_HS,  metadata = C_cluster_in_B_HS, col.name = 'c_cluster')


grid.arrange( DimPlot(umap_object_C, reduction = "umap") + labs(color = "CellRanger cluster", title = 'CellRanger') + theme(text = element_text(size = 10)),
              DimPlot(umap_object_B, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'BLAZE') + theme(text = element_text(size = 10)),
              DimPlot(umap_object_S, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'Sockeye') + theme(text = element_text(size = 10)),
              top=textGrob('Compare cells among UMAP'),
              DimPlot(umap_object_B_HS, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'BLAZE_HS') + theme(text = element_text(size = 10)),nrow=1)

4.1.6 Compare the clusters among BLAZE, Sockeye and Cellranger

With the UMAP plots above, I colored cells in bnased on Blaze clusters as Mike has suggested

# cell Ranger    
cluster_map_B = umap_object_B$seurat_clusters
C_cluster_in_C = unlist(lapply(colnames(umap_object_C), function(x){cluster_map_B[x]}))
C_cluster_in_S = unlist(lapply(colnames(umap_object_S), function(x){cluster_map_B[x]}))

umap_object_C <- AddMetaData(umap_object_C,  metadata = C_cluster_in_C, col.name = 'B_cluster')
umap_object_S <- AddMetaData(umap_object_S,  metadata = C_cluster_in_S, col.name = 'B_cluster')

grid.arrange(DimPlot(umap_object_C, reduction = "umap", group.by = 'B_cluster') + labs(color = "BLAZE cluster", title = 'Cell Ranger') + theme(text = element_text(size = 10)), 
  DimPlot(umap_object_B, reduction = "umap") + labs(color = "BLAZE cluster", title = 'BLAZE') + theme(text = element_text(size = 10)),
              DimPlot(umap_object_S, reduction = "umap", group.by = 'B_cluster') + labs(color = "BLAZE cluster", title = 'Sockeye') + theme(text = element_text(size = 10)),
              top=textGrob('Compare cells among UMAP'), nrow=1)

4.2 UMAPs on Gene count

4.2.1 Cellranger + FLAME

# Cellranger
plots <- plot_umap(gene_C_nonq20_pmth, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Cellranger+FLAME (Gene counts, non-Q20 PromethIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 895
## Number of edges: 24558
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8027
## Number of communities: 8
## Elapsed time: 0 seconds

umap_object_C <- plots[[2]]

4.2.2 Sockeye + FLAME

# Sockeye
plots <- plot_umap(gene_S_nonq20_pmth, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'Sockeye+FLAME (Gene counts, non-Q20 PromethIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1372
## Number of edges: 39479
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8203
## Number of communities: 10
## Elapsed time: 0 seconds

umap_object_S <- plots[[2]]

4.2.3 BLAZE + FLAME

plots <- plot_umap(gene_B_nonq20_pmth, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'BLAZE+FLAME (Gene counts, non-Q20 PromethIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 803
## Number of edges: 21861
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7817
## Number of communities: 6
## Elapsed time: 0 seconds

umap_object_B <- plots[[2]]

4.2.4 BLAZE HS + FLAME

plots <- plot_umap(gene_B_HS_Promith, min.features = 200,max.feature = 999999999, npc = 5, cluster_res = 0.7,fig_name = 'BLAZE+FLAME (Gene counts, non-Q20 PromethIon data)')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 981
## Number of edges: 27001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8205
## Number of communities: 7
## Elapsed time: 0 seconds

umap_object_B_HS <- plots[[2]]

4.2.5 Compare the clusters among BLAZE, Sockeye and Cellranger

With the UMAP plots above, I colored cells in BLAZE or Sockeye plot based on the clusters identified in Cellranger result.

# cell Ranger    

cluster_map_C = umap_object_C$seurat_clusters
C_cluster_in_B = unlist(lapply(colnames(umap_object_B), function(x){cluster_map_C[x]}))
C_cluster_in_S = unlist(lapply(colnames(umap_object_S), function(x){cluster_map_C[x]}))
C_cluster_in_B_HS = unlist(lapply(colnames(umap_object_B_HS), function(x){cluster_map_C[x]}))

umap_object_B <- AddMetaData(umap_object_B,  metadata = C_cluster_in_B, col.name = 'c_cluster')
umap_object_S <- AddMetaData(umap_object_S,  metadata = C_cluster_in_S, col.name = 'c_cluster')

grid.arrange(DimPlot(umap_object_C, reduction = "umap") + labs(color = "CellRanger cluster", title = 'CellRanger') + theme(text = element_text(size = 10)),
             DimPlot(umap_object_B, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'BLAZE') + theme(text = element_text(size = 10)), 
             DimPlot(umap_object_S, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'Sockeye') + theme(text = element_text(size = 10)),
             top=textGrob('Compare cells among UMAP'), nrow=1)

4.2.6 Compare the clusters among BLAZE, Sockeye and Cellranger coloured by BLAZE

With the UMAP plots above, I colored cells in BLAZE or Sockeye plot based on the clusters identified in Cellranger result.

# cell Ranger    

cluster_map_B = umap_object_B$seurat_clusters
C_cluster_in_C = unlist(lapply(colnames(umap_object_C), function(x){cluster_map_B[x]}))
C_cluster_in_S = unlist(lapply(colnames(umap_object_S), function(x){cluster_map_B[x]}))

umap_object_C <- AddMetaData(umap_object_C,  metadata = C_cluster_in_C, col.name = 'B_cluster')
umap_object_S <- AddMetaData(umap_object_S,  metadata = C_cluster_in_S, col.name = 'B_cluster')
umap_object_B_HS <- AddMetaData(umap_object_B_HS,  metadata = C_cluster_in_B_HS, col.name = 'c_cluster')

grid.arrange(DimPlot(umap_object_C, reduction = "umap", group.by = 'B_cluster') + labs(color = "CellRanger cluster", title = 'Cell Ranger') + theme(text = element_text(size = 10)),
             DimPlot(umap_object_B, reduction = "umap") + labs(color = "BLAZE cluster", title = 'Blaze') + theme(text = element_text(size = 10)),
             DimPlot(umap_object_S, reduction = "umap", group.by = 'B_cluster') + labs(color = "CellRanger cluster", title = 'Sockeye') + theme(text = element_text(size = 10)),
             top=textGrob('Compare cells among UMAP'), nrow=1)

DimPlot(umap_object_B_HS, reduction = "umap", group.by = 'c_cluster') + labs(color = "CellRanger cluster", title = 'BLAZE_HS') + theme(text = element_text(size = 10))

4.2.7 Sockeye pipeline

UAMP from Sockeye pipeline