---
title: "OAC MYO CAF CANCER CELL SUBMISSION"
author: "R.Walker"
date: "04/07/2019" 
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align = "center", fig.path='Figs/',
                      echo=TRUE, warning=FALSE, message=FALSE)
knitr::opts_knit$set(root.dir = "~/Desktop/Paper Seurat 3/")
library(Seurat)
library(sctransform)
library(Matrix)
library(ggrepel)
library(ggdendro)
library(viridis)
library(ape)
library(tidyverse)
library(cowplot)
library(Polychrome)
library(corrplot)
library("graphics")
library("gplots")
library(scales)
```

Read in the DGE and create a Seurat object from the raw data keeping genes expressed in at least 5 cells and with a minimum of 200 genes per cell.
```{r}
load("OAC.DGE.RData")

load("Cell.Metrics.RData")
OAC <- CreateSeuratObject(counts = OAC.DGE, min.cells = 5, min.features = 200, meta.data = Metrics, project = "OAC")
table(OAC@active.ident)
```
OAC1711T has very few cells and so was removed here along with the matched normal
```{r}
OAC <- SubsetData(OAC, ident.remove = c("OAC1711T", "OAC1711N"))
```

```{r}
OAC
```
Add in a dissociation genelist
```{r}
dissociation <- c("ACTG1","ANKRD1","ARID5A","ATF3","ATF4","BAG3","BHLHE40","BRD2","BTG1","BTG2","CCNL1","CCRN4L","CEBPB","CEBPD","CEBPG","CSRNP1","CXCL1","CYR61","DCN","DDX3X","DDX5","DES","DNAJA1","DNAJB1","DNAJB4","DUSP1","DUSP8","EGR1","EGR2","EIF1","EIF5","ERF","ERRFI1","FAM132B","FOS","FOSB","FOSL2","GADD45A","GCC1","GEM","H3F3B","HIPK3","HSP90AA1","HSP90AB1","HSPA1A","HSPA1B","HSPA5","HSPA8","HSPB1","HSPH1","ID3","IDI1","IER2","IER3","IFRD1","IL6","IRF1","IRF8","ITPKC","JUN","JUNB","JUND","KLF2","KLF4","KLF6","KLF9","LITAF","LMNA","MAFF","MAFK","MCL1","MIDN","MT1G","MT1B","MT1E","MT1F","MT1H","MT1A","MT1M","MT1X","MT1HL1","MT2","MYADM","MYC","MYD88","NCKAP5L","NCOA7","NFKBIA","NFKBIZ","NOP58","NPPC","NR4A1","ODC1","OSGIN1","OXNAD1","PCF11","PDE4B","PER1","PHLDA1","PNP","PNRC1","PPP1CC","PPP1R15A","PXDC1","RAP1B","RASSF1","RHOB","RHOH","RIPK1","SAT1","SBNO2","SDC4","SERPINE1","SKIL","SLC10A6","SLC38A2","SLC41A1","SOCS3","SQSTM1","SRF","SRSF5","SRSF7","STAT3","TAGLN2","TIPARP","TNFAIP3","TNFAIP6","TPM3","TPPP3","TRA2A","TRA2B","TRIB1","TUBB4B","TUBB6","UBC","USP2","WAC","ZC3H12A","ZFAND5","ZFP36","ZFP36L1","ZFP36L2","ZYX","GADD45G","HSPE1","IER5","KCNE4")
```

Find dissociaiton genes in the genes kept in the Seurat object. The Add meta data of a percentage dissociation score and a dissociation module score.
```{r}
dissociation.genes.OAC <- dissociation[dissociation %in% rownames(x= OAC)]
OAC[["percent.dissociation"]] <- PercentageFeatureSet(OAC, features = dissociation.genes.OAC)
```

Calculate what percentage of the cells will be annotated as dissociation-affected cells with cutoff value 10
```{r}
DissociationCutoff <- 10
OAC@meta.data$Dissociation_affected <- NULL
OAC@meta.data$Dissociation_affected <- ifelse(OAC@meta.data$percent.dissociation >= DissociationCutoff,1,0)
PercentageDissociationAffected <- round((nrow(OAC@meta.data[OAC@meta.data$percent.dissociation >= DissociationCutoff, ]))/nrow(OAC@meta.data), digits = 2)
print(c("Percentage of OAC cells annotated as dissociation-affected cells is", PercentageDissociationAffected))
```


Plot a histogram 
```{r}
p1 <- ggplot(OAC@meta.data) + geom_histogram(aes(percent.dissociation, fill = ..count..), bins = 1000) + scale_fill_viridis_c("Count", option = "plasma") + xlab("Percentage Dissociation Gene Content") + ylab("Cell Count")+ xlim(0,30) + ylim(0,150)
p1
```
Now load a list of housekeeping genes.
```{r}
housekeepers <- c("ACTB","B2M","HNRPLL","HPRT","PSMB2","PSMB4","PPIA","PRPS1","PRPS1L1","PRPS1L3","PRPS2","PRPSAP1","PRPSAP2","RPL10","RPL10A","RPL10L","RPL11","RPL12","RPL13","RPL14","RPL15","RPL17","RPL18","RPL19","RPL21","RPL22","RPL22L1","RPL23","RPL24","RPL26","RPL27","RPL28","RPL29","RPL3","RPL30","RPL32","RPL34","RPL35","RPL36","RPL37","RPL38","RPL39","RPL39L","RPL3L","RPL4","RPL41","RPL5","RPL6","RPL7","RPL7A","RPL7L1","RPL8","RPL9","RPLP0","RPLP1","RPLP2","RPS10","RPS11","RPS12","RPS13","RPS14","RPS15","RPS15A","RPS16","RPS17","RPS18","RPS19","RPS20","RPS21","RPS24","RPS25","RPS26","RPS27","RPS27A","RPS27L","RPS28","RPS29","RPS3","RPS3A","RPS4X","RPS5","RPS6","RPS6KA1","RPS6KA2","RPS6KA3","RPS6KA4","RPS6KA5","RPS6KA6","RPS6KB1","RPS6KB2","RPS6KC1","RPS6KL1","RPS7","RPS8","RPS9","RPSA","TRPS1","UBB")
```
Identify which of those genes are in the seurat object.
```{r}
housekeepers.genes.OAC <- housekeepers[housekeepers %in% rownames(x= OAC)]
```
Calculate for each cell the percentage of genes that have come from the housekeeper list.
```{r}
OAC[["percent.housekeepers"]] <- PercentageFeatureSet(OAC, features = housekeepers.genes.OAC)
```
Then plot a histogram
```{r}
p2 <- ggplot(OAC@meta.data) + geom_histogram(aes(percent.housekeepers, fill = ..count..), bins = 1000) + scale_fill_viridis_c("Count", option = "plasma")+ xlab("Percentage Housekeeper Gene Content") + ylab("Cell Count")+ xlim(0,30) + ylim(0,100)

p2
```

Calculate Mitochondrial percentage
```{r}
OAC[["percent.mito"]] <- PercentageFeatureSet(OAC, pattern = "^MT-")
```
Calculate what percentage of the cells will be annotated as dying cells with cutoff value 25
```{r}
Dying.Cell.Cutoff <- 25
OAC@meta.data$Dying<- NULL
OAC@meta.data$Dying <- ifelse(OAC@meta.data$percent.mito >= Dying.Cell.Cutoff,1,0)
PercentageDying <- round((nrow(OAC@meta.data[OAC@meta.data$percent.mito >= Dying.Cell.Cutoff, ]))/(nrow(OAC@meta.data))*100, digits = 2)
print(c("Percentage of OAC cells annotated as dying cells is", PercentageDying))
```

Visualise with a Histogram
```{r}
p3 <- ggplot(OAC@meta.data) + geom_histogram(aes(percent.mito, fill = ..count..), bins = 1000) + scale_fill_viridis_c("Count", option = "plasma") + xlab("Percentage Mitochondrial Gene Content") + ylab("Cell Count")+ xlim(0,100) + ylim(0,175)

p3
```

```{r}
Idents(OAC) <- "sample.ident"
p4 <- VlnPlot(OAC, features = "percent.mito", pt.size = 0, sort = T)+ geom_hline(yintercept = c(Dying.Cell.Cutoff), color = c("black")) +geom_boxplot(width=0.1, fill = "gray90") + labs(x = "", y = "Percent Mitochondrial Content",  title = "Percent Mitochondrial Content per Cell") + theme(axis.title.x = element_text(color = "black"), axis.title.y=element_text(color= "black")) + NoLegend()

p4
```

Quick Tidy
```{r}
rm(PercentageDissociationAffected, DissociationCutoff, PercentageDying, Dying.Cell.Cutoff, dissociation, dissociation.genes.OAC)
```

```{r}
median <- median(OAC@meta.data$nFeature_RNA)
mean <- mean(OAC@meta.data$nFeature_RNA)

quantile.1 <- quantile(OAC@meta.data$nFeature_RNA, 1/4)
quantile.3 <- quantile(OAC@meta.data$nFeature_RNA, 3/4)

p5 <- VlnPlot(OAC, features = "nFeature_RNA", pt.size = 0, x.lab.rot = F) + geom_hline(yintercept = c(median, quantile.1, quantile.3)) + labs(x = "", y = "Counts",  title = "Genes per Cell OAC") + theme(axis.title.x = element_text(color = "black"), axis.title.y=element_text(color= "black")) + NoLegend()

p5
```

Add cell cycle scores to each cell
```{r}
cc.genes <- readLines(con = "~/Desktop/R/cell_cycle_genes.txt")
s.genes <- cc.genes[1:43]
g2m.genes <- cc.genes[44:98]
OAC <- CellCycleScoring(object = OAC, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
head(OAC@meta.data)
```

```{r}
rm(cc.genes, s.genes, g2m.genes)
```

Use Seurat's integration analysis to correct for batch effect. Doesn't work well for so many samples and so we used sample ID and Sequencing batch as vars.to.regress in SCTransform instead.
```{r}
#OAC.List <- SplitObject(OAC, split.by = "orig.ident")
#for (i in 1:length(OAC.List)) {
#    OAC.List[[i]] <- SCTransform(OAC.List[[i]], verbose = FALSE)
#}
```

```{r}
#OAC.features <- SelectIntegrationFeatures(object.list = OAC.List, nfeatures = 3000)
#OAC.List <- PrepSCTIntegration(object.list = OAC.List, anchor.features = OAC.features, 
#    verbose = FALSE)
```

```{r}
#OAC.anchors <- FindIntegrationAnchors(object.list = OAC.List, normalization.method = "SCT", 
 #   anchor.features = OAC.features, verbose = FALSE,)
#OAC2 <- IntegrateData(anchorset = OAC.anchors, normalization.method = "SCT", 
 #   verbose = FALSE)
```

```{r}
OAC1 <- subset(OAC, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mito < 25)
```

```{r}
median1 <- median(OAC1@meta.data$nFeature_RNA)

mean1 <- mean(OAC1@meta.data$nFeature_RNA)

quantile1.1 <- quantile(OAC1@meta.data$nFeature_RNA, 1/4)
quantile1.3 <- quantile(OAC1@meta.data$nFeature_RNA, 3/4)

```
Set Cut off's for 4 nGene Groups based on median and quartiles
```{r}
OAC.25 <- WhichCells(OAC1,  expression = (quantile1.1+1) > nFeature_RNA & nFeature_RNA > -Inf)

OAC.50 <- WhichCells(OAC1,  expression = (median1+1) > nFeature_RNA & nFeature_RNA > quantile1.1)

OAC.75 <- WhichCells(OAC1,  expression = (quantile1.3+1) > nFeature_RNA & nFeature_RNA > median1)

OAC.100 <- WhichCells(OAC1, expression = Inf > nFeature_RNA & nFeature_RNA > quantile1.3)

OAC1 <-SetIdent(OAC1, cells =OAC.25, value = "1st")  
OAC1 <-SetIdent(OAC1, cells =OAC.50, value = "2nd")
OAC1 <-SetIdent(OAC1, cells =OAC.75, value = "3rd")
OAC1 <-SetIdent(OAC1, cells =OAC.100, value = "4th")
OAC1[["nGene.Groups"]] <- Idents(object = OAC1)
```

```{r}
rm(OAC.25, OAC.50, OAC.75, OAC.100)
```

Scaled on read depth, Mitotic phase and % mitochondrial RNA
```{r, include=FALSE}
OAC1 <- SCTransform(OAC1, vars.to.regress = c("percent.mito", "Phase", "nCount_RNA", "seq.batch", "orig.ident"), verbose = FALSE)
```

```{r}
OAC1 <- RunPCA(OAC1, features = VariableFeatures(object = OAC1), npcs = 150)
```

```{r}
p6 <- DimHeatmap(OAC1, dims = 1, cells = 100, balanced = TRUE, fast = F)

p6
```

```{r, fig.height = 6, fig.width = 8, fig.align = "center"}
OAC.Elbow <- ElbowPlot(OAC1, ndims = 150)

OAC.Elbow
```

```{r}
OAC1 <- FindNeighbors(OAC1, dims = 1:50)
```

```{r}
OAC1 <- FindClusters(OAC1, resolution = c(0.5:2.5))

```

```{r}
OAC1 <- RunUMAP(OAC1, dims = 1:50, n.neighbors = 50, n.epochs = 10000, spread=0.5, min_dist=0.1, seed.use = 111)
```

```{r}
UMAP <- DimPlot(OAC1, reduction = "umap", group.by = "seurat_clusters") + NoLegend()

UMAP
```

```{r}
OAC1 <- RunTSNE(OAC1, dims = 1:50, seed.use = 111, perplexity = 128, max_iter = 2000)
p6 <- DimPlot(OAC1, reduction = "tsne", group.by = "seurat_clusters")+NoLegend()
p6
```

```{r}
Idents(OAC1) <- "SCT_snn_res.0.5"
UMAP <- DimPlot(OAC1, reduction = "umap")
UMAP
```

```{r}
OAC1.Markers.0.5 <- FindAllMarkers(OAC1, min.pct = 0.25, logfc.threshold = 0.25, verbose = F, only.pos = T) 
```

```{r, fig.height=8, fig.width=8}
top10 <- OAC1.Markers.0.5 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(OAC1, features = top10$gene, ) + NoLegend()
```

```{r}
idents.list = as.character(0:28)
Cell.Types = list("T.Cells","Fibroblasts","T.Cells","Fibroblasts","Cancer","Macrophages", "Squamous","Fibroblasts","Cancer","T.Cells","B.Cells","T.Cells","Endothelium","Squamous","Mast.Cells","Plasma.Cells","Plasma.Cells","Pericytes","Plasma.Cells","T.Cells","Plasma.Cells","T.Cells","T.Cells","Squamous","RBCs","Cancer","Plasma.Cells","Lymphatics","Gastric.Chief.Cells")

for (i in 1:length(idents.list)) {OAC1 <- SetIdent(OAC1, cells = WhichCells(OAC1, idents = idents.list[[i]]), value = Cell.Types[[i]])}
OAC1[["Cell.Types"]] <- Idents(OAC1)
DimPlot(OAC1, label = T, repel = T) + NoLegend()

FeaturePlot(OAC1,features = "PTPRC",min.cutoff = "q10",max.cutoff = "q90",pt.size = 1.25)

```

```{r}
Cell.Type.Markers <- FindAllMarkers(OAC1, only.pos = T, logfc.threshold = 0.8, min.pct = 0.5)
```

Checking the epithelial cell types squamous vs cancer and then subtypes of squamous epithelial cells.
```{r}
Epithelial <- subset(OAC1, idents = c("Cancer", "Squamous"))
Epithelial <- FindVariableFeatures(Epithelial, assay = "SCT")
plot1 <- VariableFeaturePlot(Epithelial)
top10 <- head(VariableFeatures(Epithelial), 10)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
```

```{r}
Epithelial <- RunPCA(Epithelial, features = VariableFeatures(Epithelial))
```

```{r}
DimPlot(Epithelial, reduction = "pca")
```

```{r}
DimHeatmap(Epithelial, dims = 1, cells = 500, balanced = TRUE)
```

```{r}
DimHeatmap(Epithelial, dims = 1:9, cells = 500, balanced = TRUE)
```

```{r}
ElbowPlot(Epithelial)
```

```{r}
Epithelial <- FindNeighbors(Epithelial, dims = 1:15)
Epithelial <- FindClusters(Epithelial, resolution = 0.5:2.5)
```

```{r}
Idents(Epithelial) <- "SCT_snn_res.0.5"
Epithelial <- RunUMAP(Epithelial, dims = 1:15, n.epochs = 10000, min.dist = 0.001, seed.use = 111)
DimPlot(Epithelial, label = T) +NoLegend()
```

```{r}
Epithelial.Markers <- FindAllMarkers(Epithelial, only.pos = T)

top10 <- Epithelial.Markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(Epithelial, features = top10$gene) + NoLegend()
```


```{r}
idents.list = as.character(0:16)
Epithelial.Subtypes = list("Cancer","Cancer","Keratinocytes Outer","Basal Epithelial","Cancer","Stratified Keratinocytes","Cancer","Cancer","Cancer","Basal Epithelial","Immune Contaminated","Cancer","Stratified Keratinocytes","Fibroblast Contaminated","Basal Epithelial","Fibroblast Contaminated","Immune Contaminated")

for (i in 1:length(idents.list)) {Epithelial <- SetIdent(Epithelial, cells = WhichCells(Epithelial, idents = idents.list[[i]]), value = Epithelial.Subtypes[[i]])}
Epithelial[["Cell.Subtypes"]] <- Idents(object = Epithelial)
DimPlot(Epithelial, label = T, repel = T) + NoLegend()
```


```{r}
Epithelial.List <- list()
for (i in 1:length(idents.list)) {Epithelial.List[[i]] <-  WhichCells(Epithelial, idents = Epithelial.Subtypes[[i]])}
Idents(OAC1) <- OAC1[["Cell.Subtypes"]]
for (i in 1:length(idents.list)) {OAC1 <- SetIdent(OAC1, cells = Epithelial.List[[i]], value = Epithelial.Subtypes[[i]])}

DimPlot(OAC1, reduction = "umap", label = T, repel = T) + NoLegend()
```

```{r}
OAC1[["Cell.Subtypes"]] <- Idents(object = OAC1)
```
 
```{r}
Epithelial <- subset(Epithelial, idents= c("Cancer","Keratinocytes Outer","Basal Epithelial","Stratified Keratinocytes"))
DimPlot(Epithelial, label = T, repel = T) + NoLegend()
Epithelial.Markers.2 <- FindAllMarkers(Epithelial, only.pos = T)
```
```{r}
Epithelial.Markers.2 <- FindAllMarkers(Epithelial, only.pos = T)
top10 <- Epithelial.Markers.2 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
Epithelial@active.ident <- factor(Epithelial@active.ident, levels = c("Basal Epithelial","Stratified Keratinocytes","Keratinocytes Outer", "Cancer"))
DoHeatmap(subset(Epithelial, downsample= 350), features = top10$gene)
```


```{r}
Epithelial.List <- list()
for (i in 1:length(idents.list)) {Epithelial.List[[i]] <-  WhichCells(Epithelial, idents = Epithelial.Subtypes[[i]])}
Idents(OAC1) <- OAC1[["Cell.Subtypes"]]
for (i in 1:length(idents.list)) {OAC1 <- SetIdent(OAC1, cells = Epithelial.List[[i]], value = Epithelial.Subtypes[[i]])}

DimPlot(OAC1, reduction = "umap", label = T, repel = T) + NoLegend()
```

```{r}
OAC1[["Cell.Subtypes"]] <- Idents(object = OAC1)
```
 

```{r}
Idents(OAC1) <- "Cell.Types"
DimPlot(OAC1, reduction = "umap", label = T) + NoLegend()

Immune.Cells <- SubsetData(OAC1, ident.use = c("T.Cells","Mast.Cells","Plasma.Cells", "B.Cells", "Macrophages"))
DimPlot(Immune.Cells, reduction = "umap", label = F,pt.size = 1.25,cols = c("#FEC12C","#5D9BD3","#A5A5A5","#4672C2", "#EC7F3D")) +NoLegend()

FeaturePlot(Immune.Cells,features = "MS4A1",min.cutoff = "q10",max.cutoff = "q90",pt.size = 1.25, cols = c("grey","red"))
FeaturePlot(Immune.Cells,features = "CD3D",min.cutoff = "q10",max.cutoff = "q90",pt.size = 1.25, cols = c("grey","red"))
FeaturePlot(Immune.Cells,features = "LYZ",min.cutoff = "q10",max.cutoff = "q90",pt.size = 1.25, cols = c("grey","red"))
FeaturePlot(Immune.Cells,features = "TPSAB1",min.cutoff = "q10",max.cutoff = "q90",pt.size = 1.25, cols = c("grey","red"))
FeaturePlot(Immune.Cells,features = "MZB1",min.cutoff = "q10",max.cutoff = "q90",pt.size = 1.25, cols = c("grey","red"))
FeaturePlot(Immune.Cells,features = "nCount_RNA",min.cutoff = "q10",max.cutoff = "q90",pt.size = 1.25,cols= c("grey","red"))
```

```{r}
OAC1.Markers.2.5 <- FindAllMarkers(OAC1, verbose = F, only.pos = T)
top10.62 <- OAC1.Markers.2.5 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
```

```{r}
Idents(OAC1) <- "SCT_snn_res.0.5"

idents.list <- as.character(0:28)
cell.subset.list<- list("T.Cells","Connective.Tissue","T.Cells","Connective.Tissue","Cancer","Macrophages", "Squamous","Connective.Tissue","Cancer","T.Cells","B.Cells","T.Cells","Connective.Tissue","Squamous","Mast.Cells","Plasma.Cells","Plasma.Cells","Connective.Tissue","Plasma.Cells","T.Cells","Plasma.Cells","T.Cells","T.Cells","Squamous","RBCs","Cancer","Plasma.Cells","Connective.Tissue","Cancer")

for (i in 1:length(idents.list)) {OAC1 <- SetIdent(OAC1, cells = WhichCells(OAC1, idents = idents.list[[i]]), value = cell.subset.list[[i]])}
OAC1[["Subsets"]] <- Idents(object = OAC1)
DimPlot(OAC1)
```

```{r}
T.Cells <- SubsetData(OAC1, ident.use = "T.Cells")
```

```{r}
T.Cells <- FindVariableFeatures(T.Cells, assay = "SCT")
plot1 <- VariableFeaturePlot(T.Cells)
top10 <- head(VariableFeatures(T.Cells), 10)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
```

```{r}
T.Cells <- RunPCA(T.Cells, features = VariableFeatures(T.Cells),npcs = 200)
```

```{r}
DimPlot(T.Cells, reduction = "pca")
```

```{r}
DimHeatmap(T.Cells, dims = 1, cells = 500, balanced = TRUE)
```

```{r}
DimHeatmap(T.Cells, dims = 1:9, cells = 500, balanced = TRUE)
```

```{r}
ElbowPlot(T.Cells,ndims = 150)
```


```{r}
T.Cells <- FindNeighbors(T.Cells, dims = 1:10)
T.Cells <- FindClusters(T.Cells, resolution = 0.5:2.5)
```


```{r}
ElbowPlot(T.Cells,ndims = 150) 
```

clustering with 64 pcs
```{r}

T.Cells.mod <- FindNeighbors(T.Cells, dims = 1:64,k.param = 20)
T.Cells.mod <- FindClusters(T.Cells.mod, resolution = seq(0.2,2,0.05))

T.Cells.mod <- RunUMAP(T.Cells.mod, dims = 1:64, n.neighbors = 1000, n.epochs =NULL, spread = 5, min_dist=0.5, seed.use = 42)
Idents(T.Cells.mod) <- "SCT_snn_res.2"
plot1 <- DimPlot(T.Cells.mod,label = T,pt.size = 1.25)
plot2 <- DimPlot(T.Cells.mod,label = T,pt.size = 1.25,group.by = "tumour.normal")
plot1+plot2
VlnPlot(T.Cells.mod,features = c("percent.mito"))
```
Build cluster tree and reorder cluster identities for visualisation and cluster filtering
```{r}
T.Cells.mod <- BuildClusterTree(T.Cells.mod,reorder = TRUE,reorder.numeric = TRUE)
PlotClusterTree(T.Cells.mod)
```

new cluster ids based on cluster tree
```{r}
plot1 <- DimPlot(T.Cells.mod,label = T,pt.size = 1.25)
plot2 <- DimPlot(T.Cells.mod,label = T,pt.size = 1.25,group.by = "tumour.normal")
plot1+plot2
```

marker identification
```{r}
T.Cells.mod.markers <- FindAllMarkers(T.Cells.mod, only.pos = T)
top2 <- T.Cells.mod.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
top5 <- T.Cells.mod.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
top10 <- T.Cells.mod.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
```

use this file to assign basic T cell subset assignment
```{r}
write.csv(T.Cells.mod.markers,file = "T.Cells.mod.tree.markers.csv",quote = F)

DoHeatmap(T.Cells.mod, features = top5$gene,assay = "SCT") + NoLegend() +
theme(text = element_text(size = 10,face="bold.italic"))
```

Average heatmap - identify T cells vs. contaminating cells
```{r}
T.Cells.mod.cluster.averages <- AverageExpression(T.Cells.mod, return.seurat = TRUE)

levels(T.Cells.mod.cluster.averages)


DoHeatmap(T.Cells.mod.cluster.averages, features = top5gene, size = 3, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 12,face="bold.italic"))

DoHeatmap(T.Cells.mod.cluster.averages, features = c("CD3D","CD3E","CD3G","XCL1", "FCGR3A", "KLRD1", "KLRF1","CD8A", "CD8B","IL7R","CD4","MKI67","TOP2A"), size = 3, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 12,face="bold.italic"))

DoHeatmap(T.Cells.mod.cluster.averages, features = unique(top20$gene), size = 3, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 8,face="bold.italic"))
```

basic T cell subsets and removing cells
```{r}
Idents(object = T.Cells.mod) <- "tree.ident"
idents.list <- as.character(1:34)
cell.subset.list<- list("remove","remove","remove","remove","remove","remove","remove","CD8","remove","remove","CD8","CD4","CD4","remove","remove","CD4","cycling","remove","CD8","remove","CD8","CD8","CD8","CD8","CD8","CD4","NK","CD4","CD4","remove", "Treg","remove","CD4","CD4")
for (i in 1:length(idents.list)) {T.Cells.mod <- SetIdent(T.Cells.mod, cells = WhichCells(T.Cells.mod, idents = idents.list[[i]]), value = cell.subset.list[[i]])}
T.Cells.mod[["basic.Tcell.subsets"]] <- Idents(object = T.Cells.mod)

plot2<-DimPlot(T.Cells.mod,label = T,pt.size = 1.25,group.by = "tree.ident")
plot1<-DimPlot(T.Cells.mod,label = T,pt.size = 1.25,group.by = "basic.Tcell.subsets")
plot1+plot2

Idents(object = T.Cells.mod) <- "tree.ident"
T.Cells.mod.clean <-subset(T.Cells.mod, idents = c(8,11,12,13,16,17,19,21,22,23,24,25,26,27,28,29,31,33,34))
plot1<-DimPlot(T.Cells.mod.clean,label = T,pt.size = 1.25,group.by = "basic.Tcell.subsets")
plot2<-DimPlot(T.Cells.mod.clean,label = T,pt.size = 1.25,group.by = "tree.ident")
plot1+plot2
```

reclustering cleaned T cell dataset
```{r}
T.Cells.mod.clean.reclustered <- FindVariableFeatures(T.Cells.mod.clean, assay = "SCT")
T.Cells.mod.clean.reclustered <- RunPCA(T.Cells.mod.clean.reclustered, features = VariableFeatures(T.Cells.mod.clean.reclustered),npcs = 200)
ElbowPlot(T.Cells.mod.clean.reclustered,ndims = 100)

T.Cells.mod.clean.reclustered <- FindNeighbors(T.Cells.mod.clean, dims = 1:60,k.param = 20)#64
T.Cells.mod.clean.reclustered <- FindClusters(T.Cells.mod.clean.reclustered, resolution = seq(0.2,2,0.05))
T.Cells.mod.clean.reclustered <- RunUMAP(T.Cells.mod.clean.reclustered, dims = 1:60, n.neighbors = 350, n.epochs =5000, spread = 3, min_dist=1, seed.use = 42)#64 650

Idents(T.Cells.mod.clean.reclustered) <- "SCT_snn_res.0.6"
plot2 <- DimPlot(T.Cells.mod.clean.reclustered,label = T,pt.size = 2)
plot1 <- DimPlot(T.Cells.mod.clean.reclustered,label = T,pt.size = 2,group.by="basic.Tcell.subsets")
plot3 <- DimPlot(T.Cells.mod.clean.reclustered,label = T,pt.size = 2,group.by = "tumour.normal")
plot1+plot2+plot3

```

marker identification
```{r}
Idents(T.Cells.mod.clean.reclustered) <- "SCT_snn_res.0.6"
T.Cells.mod.clean.reclustered.markers <- FindAllMarkers(T.Cells.mod.clean.reclustered, only.pos = T)
top2 <- T.Cells.mod.clean.reclustered.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
top5 <- T.Cells.mod.clean.reclustered.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
top10 <- T.Cells.mod.clean.reclustered.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
top20 <- T.Cells.mod.clean.reclustered.markers %>% group_by(cluster) %>% top_n(n = 20, wt = avg_logFC)
write.csv(T.Cells.mod.clean.reclustered.markers,file = "T.Cells.mod.clean.reclustered.markers.csv",quote = F)

DoHeatmap(T.Cells.mod.clean.reclustered, features = top10$gene,assay = "SCT",) + NoLegend() +
theme(text = element_text(size = 10,face="bold.italic"))
```

more T cell subsets
```{r}
Idents(object = T.Cells.mod.clean.reclustered) <- "SCT_snn_res.0.6"
idents.list <- as.character(0:11)
cell.subset.list<- list("CD4.Naive","CD8.CCL5.Cytotoxic","Treg.IL2RA","CD8.CCL4.Cytotoxic","CD4.CD69.Activated","CD8.GNLY.Cytotoxic","CD4.CD161.memory","CD8.CXCL13.Exhausted","NK.XCL1","CD8.IFNG.Cytotoxic","Cycling.MKI67","gdTcell.TRDC")
for (i in 1:length(idents.list)) {T.Cells.mod.clean.reclustered <- SetIdent(T.Cells.mod.clean.reclustered, cells = WhichCells(T.Cells.mod.clean.reclustered, idents = idents.list[[i]]), value = cell.subset.list[[i]])}
T.Cells.mod.clean.reclustered[["more.Tcell.subsets"]] <- Idents(object = T.Cells.mod.clean.reclustered)

plot2<-DimPlot(T.Cells.mod.clean.reclustered,label = T,pt.size = 1.25,group.by = "more.Tcell.subsets")
plot1<-DimPlot(T.Cells.mod.clean.reclustered,label = T,pt.size = 1.25,group.by = "basic.Tcell.subsets")
plot1+plot2

levels(T.Cells.mod.clean.reclustered@active.ident)<-c("CD4.Naive","CD4.CD69.Activated","CD4.CD161.memory", "Treg.IL2RA" ,"CD8.CXCL13.Exhausted","CD8.CCL4.Cytotoxic","CD8.CCL5.Cytotoxic","CD8.IFNG.Cytotoxic","CD8.GNLY.Cytotoxic","NK.XCL1","gdTcell.TRDC","Cycling.MKI67")
levels(T.Cells.mod.clean.reclustered)

DimPlot(T.Cells.mod.clean.reclustered,label = T,pt.size = 2)
DimPlot(T.Cells.mod.clean.reclustered,label = F,pt.size = 2,group.by = "tumour.normal")
DimPlot(T.Cells.mod.clean.reclustered,label = F,pt.size = 2,group.by = "orig.patient")
```


```{r}
Idents(T.Cells.mod.clean.reclustered) <- "more.Tcell.subsets"
T.Cells.mod.clean.reclustered.cluster.averages <- AverageExpression(T.Cells.mod.clean.reclustered, return.seurat = TRUE)

levels(T.Cells.mod.clean.reclustered.cluster.averages)
levels(T.Cells.mod.clean.reclustered.cluster.averages)<-c("CD4.Naive","CD4.CD69.Activated","CD4.CD161.memory", "Treg.IL2RA" ,"CD8.CXCL13.Exhausted","CD8.CCL4.Cytotoxic","CD8.CCL5.Cytotoxic","CD8.IFNG.Cytotoxic","CD8.GNLY.Cytotoxic","NK.XCL1","gdTcell.TRDC","Cycling.MKI67")

DoHeatmap(T.Cells.mod.clean.reclustered.cluster.averages, features = c("CD3D","CD3E","CD3G","IL7R", "CD4","CD8A", "CD8B","TCF7","SELL", "LEF1", "CCR7","IL2RA", "FOXP3", "IKZF2","IL2", "GZMA", "GNLY", "PRF1", "GZMB", "GZMK", "IFNG", "NKG7","LAG3", "TIGIT", "PDCD1", "HAVCR2","FGFBP2", "XCL1","XCL2", "FCGR3A", "KLRD1", "KLRF1","TRDC", "TRGC2", "TRGC1","MKI67", "TOP2A"), size = 3, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 12,face="bold.italic"))

DoHeatmap(T.Cells.mod.clean.reclustered.cluster.averages, features = c("IL7R","CD69","KLRB1","IL2RA","CXCL13","CCL4","CCL5","IFNG","GNLY","XCL1","TRDC","MKI67"), size = 3, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 12,face="bold.italic"))

DoHeatmap(T.Cells.mod.clean.reclustered.cluster.averages, features = c("CD69","ITGAE", "PDCD1","CD274","PDCD1LG2","CD28","CTLA4","ICOS","BTLA","LAG3","TNFRSF9","TNFRSF4","CD27","CD40LG","HAVCR2","GZMA","GZMB","GZMM","GZMH","GZMK"), size = 3, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 12,face="bold.italic"))

DoHeatmap(T.Cells.mod.clean.reclustered.cluster.averages, features = unique(top20$gene), size = 3, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 8,face="bold.italic"))
```


```{r}
table<- as.data.frame(table(T.Cells.mod.clean.reclustered@meta.data$more.Tcell.subsets,T.Cells.mod.clean.reclustered@meta.data$orig.patient,T.Cells.mod.clean.reclustered$T.stage,T.Cells.mod.clean.reclustered$tumour.normal))
```
chi-squared tests
```{r}
d<- table(T.Cells.mod.clean.reclustered@meta.data$more.Tcell.subsets,T.Cells.mod.clean.reclustered$tumour.normal)
chisq.Tcells.tumour.normal <-apply(d, 1, chisq.test)

balloonplot(t(d), main ="T cells", xlab ="test", ylab="",
            label = FALSE, show.margins = FALSE)

mosaicplot(d, shade = TRUE, las=2,
           main = "T cells")
```
plot out pearsons residuals
```{r}
chisq <-chisq.test(d)
chisq$observed
install.packages("corrplot")

col2 <- colorRampPalette(c("#003c30", "#01665e" ,"#35978f" ,"#80cdc1","#c7eae5" ,"white", "#f6e8c3" ,"#dfc27d", "#bf812d", "#8c510a", "#543005"))
 corrplot(chisq$residuals, is.cor = FALSE,tl.col = "black",cl.align.text="l",cl.ratio=0.9,cl.cex=1,method = "circle",col = col2(100),outline=T,bg="#f0f0f0")
```
as percentages
```{r}
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
corrplot(contrib, is.cor = FALSE,tl.col = "black",cl.align.text="l",cl.ratio=0.9,cl.cex=1)
chisq$p.value

```

order patients by pT Stage
```{r}

table$Var2 <- factor(table$Var2, levels=c("OAC2811","OAC3010","OAC22","OAC1210","OAC1212","OAC1411","OAC1512","OAC1710","OAC233","OAC276","OAC44","OAC46","OAC55","OAC67","OAC174","OAC307","OAC132","OAC166","OAC2610","OAC2210","OAC263"))
table$Var1 <- factor(table$Var1, levels=c("CD4.Naive","CD4.CD69.Activated","CD4.CD161.memory", "Treg.IL2RA" ,"CD8.CXCL13.Exhausted","CD8.CCL4.Cytotoxic","CD8.CCL5.Cytotoxic","CD8.IFNG.Cytotoxic","CD8.GNLY.Cytotoxic","NK.XCL1","gdTcell.TRDC","Cycling.MKI67"))


hue_pal()(12) # c("#F8766D" ,"#B79F00" ,"#00BA38", "#00BFC4", "#619CFF", "#F564E3")

#T cell figure panel C
ggplot(table, aes(x = Var3, y = Freq, fill = Var1)) +
    geom_bar(position = "stack",stat = 'identity') +
    xlab("pT Stage") +
    ylab("Count of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#F8766D", "#DE8C00", "#B79F00", "#7CAE00", "#00BA38", "#00C08B", "#00BFC4", "#00B4F0", "#619CFF", "#C77CFF","#F564E3", "#FF64B0")) + 
  facet_grid(Var4 ~ .)+ 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))

ggplot(table, aes(x = Var3, y = Freq, fill = Var1)) +
    geom_bar(position = "fill",stat = 'identity') +
    xlab("pT Stage") +
    ylab("Proportion of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#F8766D", "#DE8C00", "#B79F00", "#7CAE00", "#00BA38", "#00C08B", "#00BFC4", "#00B4F0", "#619CFF", "#C77CFF","#F564E3", "#FF64B0")) + 
  facet_grid(Var4 ~ .)+ 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))

ggplot(table, aes(x = Var2, y = Freq, fill = Var1)) +
    geom_bar(position = "stack",stat = 'identity') +
    xlab("patient") +
    ylab("Count of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#F8766D", "#DE8C00", "#B79F00", "#7CAE00", "#00BA38", "#00C08B", "#00BFC4", "#00B4F0", "#619CFF", "#C77CFF","#F564E3", "#FF64B0")) + 
    facet_grid(Var4 ~ .) + 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))

ggplot(table, aes(x = Var2, y = Freq, fill = Var1)) +
    geom_bar(position = "fill",stat = 'identity') +
    xlab("patient") +
    ylab("Proportion of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#F8766D", "#DE8C00", "#B79F00", "#7CAE00", "#00BA38", "#00C08B", "#00BFC4", "#00B4F0", "#619CFF", "#C77CFF","#F564E3", "#FF64B0")) + 
  facet_grid(Var4 ~ .)+ 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))
```

```{r}
#T-cell scoring

T.cell.scores <- read.table(file ="Tcell.score.txt", sep = "\t", blank.lines.skip = TRUE, header = T)
#vector of hallmark gene set names
path_names <- colnames(T.cell.scores)
# test version
#sig_names_short <- sig_names[1:5]

for (i in path_names) {
  cd_features <- assign(i, as.vector(T.cell.scores[i]))
  cd_features <- na.omit(cd_features)
  T.Cells.mod.clean.reclustered <- AddModuleScore(
    object = T.Cells.mod.clean.reclustered,
    nbin=10,
    features = cd_features,
    ctrl = 100,
    name = i,
    search = TRUE)
  colnames(T.Cells.mod.clean.reclustered@meta.data) <-
    gsub(x = colnames(T.Cells.mod.clean.reclustered@meta.data)
         , pattern = paste0(i,1)
         , replacement = i
    )
}

levels(T.Cells.mod.clean.reclustered)

levels(T.Cells.mod.clean.reclustered)<-c("CD4.Naive","CD4.CD69.Activated","CD4.CD161.memory", "Treg.IL2RA" ,"CD8.CXCL13.Exhausted","CD8.CCL4.Cytotoxic","CD8.CCL5.Cytotoxic","CD8.IFNG.Cytotoxic","CD8.GNLY.Cytotoxic","NK.XCL1","gdTcell.TRDC","Cycling.MKI67")

FeaturePlot(T.Cells.mod.clean.reclustered,features = c("T.score",	"CD8.score",	"CD4.score",	"Treg.score",	"NK.score","gamma.delta.T.score"),cols = c("light gray","red"),min.cutoff = "q10",max.cutoff = "q90",pt.size = 1.5)

DotPlot(T.Cells.mod.clean.reclustered,features=c("MKI67","gamma.delta.T.score","NK.score","Exhausted.score","Cytotoxic.score","Treg.score","Naive.score"),scale = T,col.min = -0.25,col.max = 1,dot.scale = 10) +
theme(axis.text.x = element_text(angle = 90))

p <- DotPlot(T.Cells.mod.clean.reclustered,features=c("MKI67","gamma.delta.T.score","NK.score","Exhausted.score","Cytotoxic.score","Treg.score","Naive.score"),scale = T,col.min = -0.25,col.max = 1,dot.scale = 10) +
theme(axis.text.x = element_text(angle = 90))+ NoLegend()

DotPlot(T.Cells.mod.clean.reclustered,features=c("Treg","CD4.NV.CM.resting","CD4.CD8.resting","IFN.Response","Proliferation","CD8.Cytotoxic","CD8.Cytokine"),scale = T,col.min = -1,col.max = 1,dot.scale = 10) +
theme(axis.text.x = element_text(angle = 90))

ggsave('4.png', p, height = 8, width = 4)

FeaturePlot(T.Cells.mod.clean.reclustered,features = c("Naive.score",	"Exhausted.score",	"Cytotoxic.score"),cols = c("light gray","red"),min.cutoff = "q10",max.cutoff = "q90",pt.size = 1.5)

VlnPlot(T.Cells.mod.clean.reclustered,features = c("Cytotoxic.score"),pt.size = 0.1,split.by = "tumour.normal")

VlnPlot(T.Cells.mod.clean.reclustered,features = c("Naive.score",	"Exhausted.score",	"Cytotoxic.score","KLRB1","CD69"),pt.size = 0.1)

VlnPlot(T.Cells.mod.clean.reclustered,features = c("CCR7"),pt.size = 0.1)

DotPlot(T.Cells.mod.clean.reclustered,features=rev(c("PDCD1","CD274","PDCD1LG2","CD28","CTLA4","ICOS","BTLA","LAG3","TNFRSF9","TNFRSF4", "CD27","CD40LG","HAVCR2")),scale = T,,dot.scale = 10) +
theme(axis.text.x = element_text(angle = 90))

#Tcell activation signatures https://www.nature.com/articles/s41467-019-12464-3#MOESM7
T.cell.scores <- read.table(file ="resting.activated.Tcell.responses.txt", sep = "\t", blank.lines.skip = TRUE, header = T)
#vector of hallmark gene set names
path_names <- colnames(T.cell.scores)
# test version
#sig_names_short <- sig_names[1:5]

for (i in path_names) {
  cd_features <- assign(i, as.vector(T.cell.scores[i]))
  cd_features <- na.omit(cd_features)
  T.Cells.mod.clean.reclustered <- AddModuleScore(
    object = T.Cells.mod.clean.reclustered,
    nbin=10,
    features = cd_features,
    ctrl = 100,
    name = i,
    search = TRUE)
  colnames(T.Cells.mod.clean.reclustered@meta.data) <-
    gsub(x = colnames(T.Cells.mod.clean.reclustered@meta.data)
         , pattern = paste0(i,1)
         , replacement = i
    )
}

FeaturePlot(T.Cells.mod.clean.reclustered,features = c("Treg","CD4.NV.CM.resting","CD4.CD8.resting","IFN.Response","Proliferation","CD8.Cytotoxic","CD8.Cytokine" ),cols = c("light gray","red"),min.cutoff = "q10",max.cutoff = "q90",pt.size = 1.5)

VlnPlot(T.Cells.mod.clean.reclustered,features = c("Treg","CD4.NV.CM.resting","CD4.CD8.resting","IFN.Response","Proliferation","CD8.Cytotoxic","CD8.Cytokine"),pt.size = 0.1)

```

```{r}
Idents(T.Cells.mod.clean.reclustered) <- "simple.Tcell.subsets"

T.Cells.mod.clean.reclustered.cluster.averages <- AverageExpression(T.Cells.mod.clean.reclustered, return.seurat = TRUE)

levels(T.Cells.mod.clean.reclustered.cluster.averages)
levels(T.Cells.mod.clean.reclustered.cluster.averages)<-c("CD4","CD8","NK","Treg","gdTcell","cycling.cells")
levels(T.Cells.mod.clean.reclustered.cluster.averages)

DoHeatmap(T.Cells.mod.clean.reclustered.cluster.averages, features = c("CD3D","CD3E","IL7R", "CD4","CD8A", "CD8B","XCL1", "FCGR3A", "KLRD1", "KLRF1","IL2RA", "FOXP3", "IKZF2","TRDC", "TRGC2", "TRGC1","MKI67","TOP2A"), size = 3, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 12,face="bold.italic"))


T.cell.scores <- read.table(file ="Tcell.score.txt", sep = "\t", blank.lines.skip = TRUE, header = T)
#vector of hallmark gene set names
path_names <- colnames(T.cell.scores)
# test version
#sig_names_short <- sig_names[1:5]

for (i in path_names) {
  cd_features <- assign(i, as.vector(T.cell.scores[i]))
  cd_features <- na.omit(cd_features)
  T.Cells.mod.clean.reclustered.cluster.averages <- AddModuleScore(
    object = T.Cells.mod.clean.reclustered.cluster.averages,
    nbin=10,
    features = cd_features,
    ctrl = 100,
    name = i,
    search = TRUE)
  colnames(T.Cells.mod.clean.reclustered.cluster.averages@meta.data) <-
    gsub(x = colnames(T.Cells.mod.clean.reclustered.cluster.averages@meta.data)
         , pattern = paste0(i,1)
         , replacement = i
    )
}

DotPlot(T.Cells.mod.clean.reclustered.cluster.averages,features=c("MKI67","gamma.delta.T.score","NK.score","Exhausted.score","Cytotoxic.score","Treg.score","Naive.score","CD8.score","CD4.score","T.score"),scale = T,col.min = -1,col.max = 1) +
theme(axis.text.x = element_text(angle = 90))

DotPlot(T.Cells.mod.clean.reclustered.cluster.averages,features=c("MKI67","gamma.delta.T.score","NK.score","Exhausted.score","Cytotoxic.score","Treg.score","Naive.score","CD8.score","CD4.score","T.score"),scale = T,col.min = -1,col.max = 1) +
theme(axis.text.x = element_text(angle = 90))

```

```{r}
idents.list <- as.character(T.Cells.mod.clean.reclustered@meta.data$new.Tcell.subsets)

T.Cells.mod.clean.reclustered.cluster.averages <- AverageExpression(T.Cells.mod.clean.reclustered, return.seurat = TRUE)

levels(T.Cells.mod.clean.reclustered.cluster.averages)
levels(T.Cells.mod.clean.reclustered.cluster.averages)<-c("CD4.Naive","CD4.CCR7.Naive","CD4.TTN","CD4.TRBC1","CD4.IL2RA.Treg","CD4.LTB.Treg","CD4.CD161.memory","CD4.CRTAM.cytotoxic","CD8.CCL5.Cytotoxic","CD8.IFNG.Cytotoxic","CD8.CXCL13.Exhausted","CD8.CCL4L2.NKT","CD8.GNLY.NKT","CD3neg.XCL1.NK","gdTcell","cycling.cells")
levels(T.Cells.mod.clean.cluster.averages)

DoHeatmap(T.Cells.mod.clean.cluster.averages, features = c("CD3D","CD3E","CD3G","XCL1", "FCGR3A", "KLRD1", "KLRF1","CD8A", "CD8B","IL7R", "CD4","TCF7", "SELL", "LEF1", "CCR7","LAG3", "TIGIT", "PDCD1", "HAVCR2","IL2", "GZMA", "GNLY", "PRF1", "GZMB", "GZMK", "IFNG", "NKG7","IL2RA", "FOXP3", "IKZF2", "TGFB1", "TGFB3", "TGFBI", "TGFBR1","MAF", "CXCL13", "CXCR5", "PDCD1","IRF4", "CREM", "NR4A2","STAT4", "IFNG", "IL12RB2","GATA3", "STAT6", "IL4","TRDC", "TRGC2", "TRGC1"), size = 3, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 12,face="bold.italic"))


table<- as.data.frame(table(T.Cells.mod.clean.reclustered@meta.data$new.Tcell.subsets,T.Cells.mod.clean.reclustered@meta.data$orig.patient))
prop <- as.data.frame(prop.table(x = table(T.Cells.mod.clean.reclustered@meta.data$new.Tcell.subsets,T.Cells.mod.clean.reclustered@meta.data$orig.patient), margin = 2))
prop
# create your own color palette (36 colors) based on `seedcolors`
P19 = createPalette(19,  c("#ff0000", "#00ff00", "#0000ff"))
swatch(P19)

barplot(prop, xlab='orig.patient',ylab='Proportions',main="Proportions of T cells",col=P16,legend=rownames(prop), args.legend=list(x=ncol(prop) + 4,y=max(colSums(prop)),bty = "n"))

barplot(prop, xlab='orig.patient',ylab='Proportions',main="Proportions of T cells",
col=P16)

ggplot(table, aes(x = Var2, y = Freq, fill = Var1)) +
    geom_bar(stat = 'identity') +
    xlab("patient") +
    ylab("Count of T cells") +
    theme(legend.title=element_blank())


ggplot(prop, aes(x = Var2, y = Freq, fill = Var1)) +
    geom_bar(stat = 'identity') +
    xlab("patient") +
    ylab("Proportion of T cells") +
    theme(legend.title=element_blank())


FeaturePlot(T.Cells.mod.clean.reclustered,features = c("FGFBP2", "CD8A","CD4","IL7R","FOXP3", "LAG3","TIGIT","MKI67"),min.cutoff = "q10",max.cutoff = "q90" ,cols = c("gold","black"))

FeaturePlot(T.Cells.mod.clean.reclustered,features = c("T.score",	"NK.score",	"CD8.score",	"CD4.score",	"Naive.score",	"Exhausted.score",	"Cytotoxic.score",	"Treg.score",	"T.follicular.helper.score",	"T.helper.17.score",	"T.helper.1.score",	"T.helper.2.score",	"gamma.delta.T.score"),min.cutoff = "q10",max.cutoff = "q90" ,cols = c("grey","red"))

VlnPlot(T.Cells.mod.clean.reclustered,features = c("T.score",	"NK.score",	"CD8.score",	"CD4.score",	"Naive.score",	"Exhausted.score",	"Cytotoxic.score",	"Treg.score",	"T.follicular.helper.score",	"T.helper.17.score",	"T.helper.1.score",	"T.helper.2.score",	"gamma.delta.T.score"),pt.size = 0.1,group.by = "new.Tcell.subsets" ,split.by = "tumour.normal")

VlnPlot(T.Cells.mod.clean.reclustered,features = c("Naive.score",	"Exhausted.score",	"Cytotoxic.score",	"Treg.score"),pt.size = 0.1,group.by = "new.Tcell.subsets")


###Stacked vlnplots-https://divingintogeneticsandgenomics.rbind.io/post/stacked-violin-plot-for-visualizing-single-cell-data-in-seurat/
library(Seurat)
library(patchwork)
library(ggplot2)

## remove the x-axis text and tick
## plot.margin to adjust the white space between each plot.
## ... pass any arguments to VlnPlot in Seurat
modify_vlnplot<- function(obj, 
                          feature, 
                          pt.size = 0, 
                          plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"),
                          ...) {
  p<- VlnPlot(obj, features = feature, pt.size = pt.size, ... )  + 
    xlab("") + ylab(feature) + ggtitle("") + 
    theme(legend.position = "none", 
          axis.text.x = element_blank(), 
          axis.ticks.x = element_blank(), 
          axis.title.y = element_text(size = rel(1), angle = 0), 
          axis.text.y = element_text(size = rel(1)), 
          plot.margin = plot.margin ) 
  return(p)
}

## extract the max value of the y axis
extract_max<- function(p){
  ymax<- max(ggplot_build(p)$layout$panel_scales_y[[1]]$range$range)
  return(ceiling(ymax))
}


## main function
StackedVlnPlot<- function(obj, features,
                          pt.size = 0, 
                          plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"),
                          ...) {
  
  plot_list<- purrr::map(features, function(x) modify_vlnplot(obj = obj,feature = x, ...))
  
  # Add back x-axis title to bottom plot. patchwork is going to support this?
  plot_list[[length(plot_list)]]<- plot_list[[length(plot_list)]] +
    theme(axis.text.x=element_text(), axis.ticks.x = element_line())
  
  # change the y-axis tick to only max value 
  ymaxs<- purrr::map_dbl(plot_list, extract_max)
  plot_list<- purrr::map2(plot_list, ymaxs, function(x,y) x + 
                            scale_y_continuous(breaks = c(y)) + 
                            expand_limits(y = y))

  p<- patchwork::wrap_plots(plotlist = plot_list, ncol = 1)
  return(p)
}


T.cell.scores <- read.table(file ="Tcell.score.txt", sep = "\t", blank.lines.skip = TRUE, header = T)
#vector of hallmark gene set names
path_names <- colnames(T.cell.scores)
# test version
#sig_names_short <- sig_names[1:5]

for (i in path_names) {
  cd_features <- assign(i, as.vector(T.cell.scores[i]))
  cd_features <- na.omit(cd_features)
  T.Cells.mod.clean.reclustered <- AddModuleScore(
    object = T.Cells.mod.clean.reclustered,
    nbin=10,
    features = cd_features,
    ctrl = 100,
    name = i,
    search = TRUE)
  colnames(T.Cells.mod.clean.reclustered@meta.data) <-
    gsub(x = colnames(T.Cells.mod.clean.reclustered@meta.data)
         , pattern = paste0(i,1)
         , replacement = i
    )
}


T.cell.scores <- read.table(file ="Tcell.score.txt", sep = "\t", blank.lines.skip = TRUE, header = T)
#vector of hallmark gene set names
path_names <- colnames(T.cell.scores)
# test version
#sig_names_short <- sig_names[1:5]

for (i in path_names) {
  cd_features <- assign(i, as.vector(T.cell.scores[i]))
  cd_features <- na.omit(cd_features)
  T.Cells.mod.clean.cluster.averages <- AddModuleScore(
    object = T.Cells.mod.clean.cluster.averages,
    nbin=10,
    features = cd_features,
    ctrl = 100,
    name = i,
    search = TRUE)
  colnames(T.Cells.mod.clean.cluster.averages@meta.data) <-
    gsub(x = colnames(T.Cells.mod.clean.cluster.averages@meta.data)
         , pattern = paste0(i,1)
         , replacement = i
    )
}

VlnPlot(T.Cells.mod.clean.cluster.averages,features = c("Naive.score",	"Exhausted.score",	"Cytotoxic.score",	"Treg.score"),pt.size = 0.1)

levels(T.Cells.mod.clean.cluster.averages)<-c("CD4.Naive","CD4.CCR7.Naive","CD4.TTN","CD4.TRBC1","CD4.IL2RA.Treg","CD4.LTB.Treg","CD4.CD161.memory","CD4.CRTAM.cytotoxic","CD8.CCL5.Cytotoxic","CD8.IFNG.Cytotoxic","CD8.CXCL13.Exhausted","CD8.CCL4L2.NKT","CD8.GNLY.NKT","CD3neg.XCL1.NK","gdTcell","cycling.cells")

DotPlot(T.Cells.mod.clean.cluster.averages,features=c("MKI67","gamma.delta.T.score","NK.score","Exhausted.score","Cytotoxic.score","Treg.score","Naive.score","CD8.score","CD4.score","T.score"),scale = T,col.min = -1,col.max = 1) +
theme(axis.text.x = element_text(angle = 90))

StackedVlnPlot(obj = T.Cells.mod.clean.reclustered, features = c("T.score",	"CD8.score",	"CD4.score",	"Treg.score",	"NK.score","gamma.delta.T.score"))

StackedVlnPlot(obj = T.Cells.mod.clean.reclustered, features=c( "CCR7","PDCD1","CD28","CTLA4","ICOS","BTLA","LAG3","TNFRSF9","CD27","CD40LG","HAVCR2","GZMA","GZMB","GZMM","GZMH","GZMK"))

colnames(T.Cells.mod.clean.reclustered@meta.data)

T.Cells.mod.clean.reclustered[["T.helper.2.score"]] <- NULL
T.Cells.mod.clean.reclustered[["gamma.delta.T.score"]] <- NULL


columns.to.remove <- c("T.score","NK.score"                      
,"CD8.score" ,                    "CD4.score"                     
,"Naive.score",                    "Exhausted.score"               
,"Cytotoxic.score",                "Treg.score"                    
,"T.follicular.helper.score",      "T.helper.17.score"             
,"T.helper.1.score" ,              "T.helper..2.score"             
,"gamma.delta.T.score",            "T.score.1"                     
,"NK.score.1"          ,           "CD8.score.1"                   
,"CD4.score.1"          ,          "Naive.score.1"                 
,"Exhausted.score.1"     ,         "Cytotoxic.score.1"             
,"Treg.score.1"           ,        "T.follicular.helper.score.1"   
,"T.helper.17.score.1"     ,       "T.helper.1.score.1"            
,"T.helper.2.score"         ,      "gamma.delta.T.score.1")
for(i in columns.to.remove) {
  T.Cells.mod.clean.reclustered[[i]] <- NULL
}

columns.to.remove <- c("T.cells.CD8.1",                  "T.cells.CD4.naive.1"          
,"T.cells.CD4.memory.resting",     "T.cells.CD4.memory.activated" 
,"T.cells.follicular.helper",      "T.cells.regulatory"          
,"T.cells.gamma.delta",            "NK.cells.resting"             
,"NK.cells.activated",             "T.helper.2.score"             
,"gamma.delta.T.score")
for(i in columns.to.remove) {
  T.Cells.mod.clean.reclustered[[i]] <- NULL
}

```


```{r}
Idents(OAC1) <- "Subsets"

DimPlot(OAC1)
```

```{r}
Cancer <- subset(OAC1, idents = "Cancer")
Cancer <- FindVariableFeatures(Cancer, assay = "SCT")
plot1 <- VariableFeaturePlot(Cancer)
top10 <- head(VariableFeatures(Cancer), 10)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
```

```{r}
Cancer <- RunPCA(Cancer, features = VariableFeatures(Cancer))

DimPlot(Cancer, reduction = "pca")
```

```{r}
DimHeatmap(Cancer, dims = 1, cells = 500, balanced = TRUE)
```

```{r}
DimHeatmap(Cancer, dims = 1:9, cells = 500, balanced = TRUE)
```

```{r}
ElbowPlot(Cancer)
```

```{r}
Cancer <- FindNeighbors(Cancer, dims = 1:15)
Cancer <- FindClusters(Cancer, resolution = 0.5:2.5)
```

```{r}
Idents(Cancer) <- "SCT_snn_res.0.5"
Cancer <- RunUMAP(Cancer, dims = 1:10, seed.use = 111)
DimPlot(Cancer, label = T) +NoLegend()
```

```{r}
Cancer.Markers <- FindAllMarkers(Cancer, only.pos = T)
Cancer.Markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)

top10 <- Cancer.Markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(Cancer, features = top10$gene) + NoLegend()
```
Pruning the cancer cell clusters
```{r}
OSM.Cells <- WhichCells(Cancer, idents = "11")
Contaminated <- WhichCells(Cancer, idents = "10")
snRNA.Cancer <- WhichCells(Cancer, idents = "9")
Parietal.Cells <- WhichCells(Cancer, idents = "12")
```

```{r}
Idents(Cancer) <- "tumour.normal"
Cancer <- subset(Cancer, idents = "Tumour")
```

```{r}
Idents(Cancer) <- "SCT_snn_res.0.5"
Cancer <- subset(Cancer, idents = c("12", "11", "10", "9"), invert = T)  
```

Removing cancer samples with few cells
```{r}
Idents(Cancer) <- "orig.patient"
Cancer <- subset(Cancer, idents = c("OAC1411", "OAC1512", "OAC2610", "OAC263", "OAC2811"), invert = T, downsample = 150, seed =111)  
```

```{r}
Cancer <- FindVariableFeatures(Cancer, assay = "SCT")
plot1 <- VariableFeaturePlot(Cancer)
top10 <- head(VariableFeatures(Cancer), 10)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
```

```{r}
Cancer <- RunPCA(Cancer, features = VariableFeatures(Cancer))
```

```{r}
DimPlot(Cancer, reduction = "pca")
```

```{r}
DimHeatmap(Cancer, dims = 1, cells = 500, balanced = TRUE)
```

```{r}
DimHeatmap(Cancer, dims = 1:9, cells = 500, balanced = TRUE)
```

```{r}
ElbowPlot(Cancer)
```

```{r}
Cancer <- FindNeighbors(Cancer, dims = 1:10)
Cancer <- FindClusters(Cancer, resolution = 0.5:2.5)
```

```{r}
Idents(Cancer) <- "SCT_snn_res.0.5"
Cancer <- RunUMAP(Cancer, dims = 1:10, seed.use = 111)
DimPlot(Cancer, label = T) +NoLegend()
```

```{r}
Cancer.Downsampled.Markers <- FindAllMarkers(Cancer, only.pos = T)
Cancer.Downsampled.Markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)

top10 <- Cancer.Downsampled.Markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(Cancer, features = top10$gene) + NoLegend()
```

Now Connective Tissue

```{r}
Connective.Tissue <- subset(OAC1, idents = "Connective.Tissue")
Connective.Tissue <- FindVariableFeatures(Connective.Tissue, assay = "SCT")
plot1 <- VariableFeaturePlot(Connective.Tissue)
top10 <- head(VariableFeatures(Connective.Tissue), 10)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
```

```{r}
Connective.Tissue <- RunPCA(Connective.Tissue, features = VariableFeatures(Connective.Tissue))
```

```{r}
DimPlot(Connective.Tissue, reduction = "pca")
```

```{r}
DimHeatmap(Connective.Tissue, dims = 1, cells = 500, balanced = TRUE)
```

```{r}
DimHeatmap(Connective.Tissue, dims = 1:9, cells = 500, balanced = TRUE)
```

```{r}
ElbowPlot(Connective.Tissue)
```

```{r}
Connective.Tissue <- FindNeighbors(Connective.Tissue, dims = 1:15)
Connective.Tissue <- FindClusters(Connective.Tissue, resolution = 0.5:2.5)
```

```{r}
Idents(Connective.Tissue) <- "SCT_snn_res.0.5"
Connective.Tissue <- RunUMAP(Connective.Tissue, dims = 1:15, seed.use = 111)
DimPlot(Connective.Tissue, label = T) +NoLegend()
```

```{r}
Connective.Tissue.Markers <- FindAllMarkers(Connective.Tissue, only.pos = T)

top10 <- Connective.Tissue.Markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(Connective.Tissue, features = top10$gene) + NoLegend()
```


```{r}
idents.list = as.character(0:9)
Connective.Tissue.Subtypes = list("Fibroblasts","Fibroblasts","Fibroblasts","Fibroblasts","Fibroblasts","Endothelium","Smooth.Muscle","Fibroblasts","Pericytes","Lymphatic.Endothelium")

for (i in 1:length(idents.list)) {Connective.Tissue <- SetIdent(Connective.Tissue, cells = WhichCells(Connective.Tissue, idents = idents.list[[i]]), value = Connective.Tissue.Subtypes[[i]])}
Connective.Tissue[["Cell.Subtypes"]] <- Idents(object = Connective.Tissue)
DimPlot(Connective.Tissue, label = T, repel = T) + NoLegend()
```

```{r}
Connective.Tissue.List <- list()
for (i in 1:length(idents.list)) {Connective.Tissue.List[[i]] <-  WhichCells(Connective.Tissue, idents = Connective.Tissue.Subtypes[[i]])}
Idents(OAC1) <- OAC1[["Cell.Subtypes"]]
for (i in 1:length(idents.list)) {OAC1 <- SetIdent(OAC1, cells = Connective.Tissue.List[[i]], value = Connective.Tissue.Subtypes[[i]])}

DimPlot(OAC1, reduction = "umap", label = T, repel = T, split.by = "tumour.normal") + NoLegend()
```

```{r}
Fibroblasts <- SubsetData(Connective.Tissue, ident.use = c("Fibroblasts"))
```

```{r}
Fibroblasts <- FindVariableFeatures(Fibroblasts, assay = "SCT")

plot1 <- VariableFeaturePlot(Fibroblasts)
top10 <- head(VariableFeatures(Fibroblasts), 10)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2), ncol = 1)
```

```{r}
Fibroblasts <- RunPCA(Fibroblasts, features = VariableFeatures(Fibroblasts))
```

```{r}
DimPlot(Fibroblasts, reduction = "pca")
```

```{r}
DimHeatmap(Fibroblasts, dims = 1, cells = 500, balanced = TRUE)
```

```{r}
DimHeatmap(Fibroblasts, dims = 1:9, cells = 500, balanced = TRUE)
```

```{r}
ElbowPlot(Fibroblasts)
```

```{r}
Fibroblasts <- FindNeighbors(Fibroblasts, dims = 1:15)
Fibroblasts <- FindClusters(Fibroblasts, resolution = 0.5:2.5)
```

```{r}
Idents(Fibroblasts) <- "SCT_snn_res.0.5"
Fibroblasts <- RunUMAP(Fibroblasts, dims = 1:15, n.neighbors = 50, n.epochs = 1000, spread = 0.3, min_dist=0.001, seed.use = 111)
Fibroblasts <- RunTSNE(Fibroblasts, dims = 1:15, perplexity = 128, max_iter = 2000, seed.use = 111)
DimPlot(Fibroblasts, reduction = "tsne")
```

```{r}
DimPlot(Fibroblasts, reduction = "umap")
```

```{r}
Fibroblasts.Markers <- FindAllMarkers(Fibroblasts, only.pos = T)
Fibroblasts.Markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
```

```{r}
top10 <- Fibroblasts.Markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(Fibroblasts, features = top10$gene) + NoLegend()
```

```{r}
idents.list = as.character(0:7)
Fibroblasts.Subtypes = list("Prostaglandin.NOFs", "Quiescent.NOFs", "Thrombospondin.CAFs", "MFAP5.NOFs", "Collagen.CAFs", "MFAP5.NOFs", "Myofibroblast.CAFs", "MFAP5.Semaphorin.CAFs")

for (i in 1:length(idents.list)) {Fibroblasts <- SetIdent(Fibroblasts, cells = WhichCells(Fibroblasts, idents = idents.list[[i]]), value = Fibroblasts.Subtypes[[i]])}
Fibroblasts[["Cell.Subtypes"]] <- Idents(object = Fibroblasts)
DimPlot(Fibroblasts, label = T, repel = T) + NoLegend()
```

```{r}
Fibroblasts.List <- list()
for (i in 1:length(idents.list)) {Fibroblasts.List[[i]] <-  WhichCells(Fibroblasts.Complete, idents = Fibroblasts.Subtypes[[i]])}
Idents(OAC1) <- OAC1[["Cell.Subtypes"]]
for (i in 1:length(idents.list)) {OAC1 <- SetIdent(OAC1, cells = Fibroblasts.List[[i]], value = Fibroblasts.Subtypes[[i]])}

DimPlot(OAC1, reduction = "umap", label = T, repel = T, split.by = "tumour.normal") + NoLegend()
```

```{r}
OAC1[["Cell.Subtypes"]] <- Idents(object = OAC1)
```

```{r}
Fibroblasts.Complete <- Fibroblasts
Fibroblasts <- subset(Fibroblasts, idents = "Quiescent.NOFs", invert=T)
DimPlot(Fibroblasts,group.by = "Cell.Subtypes", pt.size = 2, label = T) +NoLegend() +theme(axis.title.y = element_blank()) +labs(x = "UMAP")
```

Now separated the Fibroblasts from cancer only, there is clearly blurring between the NOF and CAFs and a spectrum rather than discrete clusters 
```{r}
Idents(Fibroblasts) <- "tumour.normal"
CAFs <- subset(Fibroblasts, idents = "Tumour")
NOFs <- subset(Fibroblasts, idents = "Normal")
```


```{r}
CAFs <- FindVariableFeatures(CAFs, assay = "SCT")
plot1 <- VariableFeaturePlot(CAFs)
top10 <- head(VariableFeatures(CAFs), 10)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
```

```{r}
CAFs <- RunPCA(CAFs, features = VariableFeatures(CAFs))
```

```{r}
DimPlot(CAFs, reduction = "pca")
```

```{r}
DimHeatmap(CAFs, dims = 1, cells = 500, balanced = TRUE)
```

```{r}
DimHeatmap(CAFs, dims = 1:9, cells = 500, balanced = TRUE)
```

```{r}
ElbowPlot(CAFs)
```

```{r}
CAFs <- FindNeighbors(CAFs, dims = 1:10)
CAFs <- FindClusters(CAFs, resolution = 0.5:2.5)
```

```{r}
Idents(CAFs) <- "SCT_snn_res.0.5"
CAFs <- RunUMAP(CAFs, dims = 1:10, seed.use = 111)
DimPlot(CAFs, label = T) +NoLegend()
```

```{r}
CAFs.Markers <- FindAllMarkers(CAFs, only.pos = T)
CAFs.Markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
```


```{r}
top10 <- CAFs.Markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(CAFs, features = top10$gene) + NoLegend()
```


```{r}
idents.list = as.character(0:6)
CAFs.Subtypes = list("Thrombospondin.CAFs","Collagen.CAFs","Collagen.CAFs","Inflammatory.CAFS","Myofibroblast.CAFs","MFAP5.Semaphorin.CAFs","Thrombospondin.CAFs")

for (i in 1:length(idents.list)) {CAFs <- SetIdent(CAFs, cells = WhichCells(CAFs, idents = idents.list[[i]]), value = CAFs.Subtypes[[i]])}
CAFs[["Cell.Subtypes"]] <- Idents(object = CAFs)
DimPlot(CAFs, label = T, repel = T) + NoLegend()
```

```{r}
CAFs.List <- list()
for (i in 1:length(idents.list)) {CAFs.List[[i]] <-  WhichCells(CAFs, idents = CAFs.Subtypes[[i]])}
Idents(OAC1) <- OAC1[["Cell.Subtypes"]]
for (i in 1:length(idents.list)) {OAC1 <- SetIdent(OAC1, cells = CAFs.List[[i]], value = CAFs.Subtypes[[i]])}

DimPlot(OAC1, reduction = "umap", label = T, repel = T, split.by = "tumour.normal") + NoLegend()
```

Now the Endothelial cells
```{r}
Endothelium <- SubsetData(Connective.Tissue, ident.use = c("Endothelium", "Pericytes", "Lymphatic.Endothelium", "Smooth.Muscle"))
Endothelium <- FindVariableFeatures(Endothelium, assay = "SCT")

plot1 <- VariableFeaturePlot(Endothelium)
top10 <- head(VariableFeatures(Endothelium), 10)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2), ncol = 1)
```

```{r}
Endothelium <- RunPCA(Endothelium, features = VariableFeatures(Endothelium))
```

```{r}
DimPlot(Endothelium, reduction = "pca")
```

```{r}
DimHeatmap(Endothelium, dims = 1, cells = 500, balanced = TRUE)
```

```{r}
DimHeatmap(Endothelium, dims = 1:9, cells = 500, balanced = TRUE)
```


```{r}
ElbowPlot(Endothelium)
```

```{r}
Endothelium <- FindNeighbors(Endothelium, dims = 1:10)
Endothelium <- FindClusters(Endothelium, resolution = 0.5:2.5)
```

```{r}
Idents(Endothelium) <- "SCT_snn_res.0.5"
Endothelium <- RunUMAP(Endothelium, dims = 1:19, n.neighbors = 50, n.epochs = 1000, spread = 0.3, min_dist=0.001, seed.use = 111)

DimPlot(Endothelium, reduction = "umap", label = T)
```

```{r}
Endothelium.Markers <- FindAllMarkers(Endothelium, only.pos = T)
Endothelium.Markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
```

```{r}
top10 <- Endothelium.Markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(Endothelium, features = top10$gene) + NoLegend()
```

```{r}
idents.list = as.character(0:7)
Endothelium.Subtypes = list("Normal.Endothelium", "Smooth.Muscle", "Pericytes", "Inflammatory.Response.Endothelium ", "COL4.Tumour.Endothelium", "Lymphatic.Endothelium", "Endothelin.Tumour.Endothelium", "Tumour.Muscle")


for (i in 1:length(idents.list)) {Endothelium <- SetIdent(Endothelium, cells = WhichCells(Endothelium, idents = idents.list[[i]]), value = Endothelium.Subtypes[[i]])}
Endothelium[["Cell.Subtypes"]] <- Idents(object = Endothelium)
DimPlot(Endothelium, label = T, repel = T) + NoLegend()
```

```{r}
Endothelium.List <- list()
for (i in 1:length(idents.list)) {Endothelium.List[[i]] <-  WhichCells(Endothelium, idents = Endothelium.Subtypes[[i]])}
Idents(OAC1) <- OAC1[["Cell.Subtypes"]]
for (i in 1:length(idents.list)) {OAC1 <- SetIdent(OAC1, cells = Endothelium.List[[i]], value = Endothelium.Subtypes[[i]])}

DimPlot(OAC1, reduction = "umap", label = T, repel = T) + NoLegend()
```

```{r}
OAC1[["Cell.Subtypes"]] <- Idents(object = OAC1)
```


```{r}
B.Lymphocytes <- SubsetData(OAC1, ident.use = c("Plasma.Cells","B.Cells"))
B.Lymphocytes <- FindVariableFeatures(B.Lymphocytes, assay = "SCT")

plot1 <- VariableFeaturePlot(B.Lymphocytes)
top10 <- head(VariableFeatures(B.Lymphocytes), 10)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2), ncol = 1)
```

```{r}
B.Lymphocytes <- RunPCA(B.Lymphocytes, features = VariableFeatures(B.Lymphocytes))
```

```{r}
ElbowPlot(B.Lymphocytes,ndims = 50)
B.Lymphocytes.mod <- FindNeighbors(B.Lymphocytes, dims = 1:32,k.param = 15)
B.Lymphocytes.mod <- FindClusters(B.Lymphocytes.mod, resolution = seq(0.2,2,0.1))

B.Lymphocytes.mod <- RunUMAP(B.Lymphocytes.mod, dims = 1:32, n.neighbors = 350, n.epochs =NULL, spread = 5, min_dist=0.5, seed.use = 42)
Idents(B.Lymphocytes.mod) <- "SCT_snn_res.1.5"
DimPlot(B.Lymphocytes.mod,label = T,pt.size = 1.25)
DimPlot(B.Lymphocytes.mod,group.by = "orig.ident") #orig.patient seq.batch Dying
DimPlot(B.Lymphocytes.mod,group.by = "Cell.Subtypes")
DimPlot(B.Lymphocytes.mod,group.by = "tumour.normal")
FeaturePlot(B.Lymphocytes.mod,features = c("HLA-DRA", "MS4A1", "GZMB"))
FeaturePlot(B.Lymphocytes.mod,features = c("AICDA", "MKI67", "LMO2","BCL2A1"))
FeaturePlot(B.Lymphocytes.mod,features = c("JCHAIN", "IGHA1", "IGHM","IGHG1", "IGLL5"))
FeaturePlot(B.Lymphocytes.mod,features = c("IGHG1","WT1-AS","IGHG4"))
FeaturePlot(B.Lymphocytes.mod,features = c("IGHD","IGHM"))
FeaturePlot(B.Lymphocytes.mod,features = c("percent.mito","nFeature_RNA","nCount_RNA"))
FeaturePlot(B.Lymphocytes.mod,features = c("KRT13","nFeature_RNA","nCount_RNA","percent.mito"),min.cutoff = "q10",max.cutoff = "q90")
VlnPlot(B.Lymphocytes.mod,features = c("percent.mito","nFeature_RNA","nCount_RNA"))

B.Lymphocytes.mod.cluster.averages <- AverageExpression(B.Lymphocytes.mod, return.seurat = TRUE)

DoHeatmap(B.Lymphocytes.mod.cluster.averages, features = c("MZB1", "CD5", "CD19", "CD79A","CD1D", "CD274", "STMN1", "AICDA", "MKI67", "BIRC5","LMO2", "BCL2A1","GZMB", "MS4A1", "HLA-DRA","JCHAIN", "IGHA1","IGHG1","IGHG4","IGHM","IGHD", "IRF4","PRDM1"), size = 5,draw.lines = F,) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 15,face="bold.italic"))

DoHeatmap(B.Lymphocytes.mod, features = c("CD5", "CD19", "CD79A","CD1D", "CD274", "STMN1", "AICDA", "MKI67", "BIRC5","LMO2", "BCL2A1","GZMB", "MS4A1", "HLA-DRA","JCHAIN", "IGHA1","IGHG1"), size = 5,draw.lines = F,) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 15,face="bold.italic"))

B.Lymphocytes.mod.markers <- FindAllMarkers(B.Lymphocytes.mod, only.pos = T)
top2 <- B.Lymphocytes.mod.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
top5 <- B.Lymphocytes.mod.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
top10 <- B.Lymphocytes.mod.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
write.csv(B.Lymphocytes.mod.markers,file = "B.lymphocyte.mod.markers.csv",quote = F)

DoHeatmap(B.Lymphocytes.mod, features = top10$gene,assay = "SCT",) + NoLegend() +
theme(text = element_text(size = 10,face="bold.italic"))

table(B.Lymphocytes.mod$SCT_snn_res.1.5)

idents.list <- as.character(0:18)
cell.subset.list<- list("MALT.B.IGHA1.JCHAIN.IGK","Follicular.B.MS4A1.HLADRA","MALT.B.IGHA1.JCHAIN.IGL","Plasma.B.IGHG1high.IGHG4high.IGL","Plasma.B.IGHG1high.IGHG4high.IGK","Follicular.B.MS4A1.HLADRA","Follicular.B.MS4A1.HLADRA","GC.B.LightZone.BCL2A1","remove.low.quality.capture","Plasma.B.IGHG2.IGK","GC.B.DarkZone.AICDA","Plasma.B.IGHG1low.IGHG4high.IGK","MALT.B.IGHA1.JCHAIN.IGLL5","Follicular.B.MS4A1.HLADRA","Plasma.B.IGHG1.IGK","MALT.B.IGHM.JCHAIN","Mucosal.B.IGHD","Plasma.B.IGHG1high.IGHG3high.IGK","MALT.B.IGHA1.JCHAIN.IGK")
for (i in 1:length(idents.list)) {B.Lymphocytes.mod <- SetIdent(B.Lymphocytes.mod, cells = WhichCells(B.Lymphocytes.mod, idents = idents.list[[i]]), value = cell.subset.list[[i]])}
B.Lymphocytes.mod[["new.Bcell.subsets"]] <- Idents(object = B.Lymphocytes.mod)

Idents(B.Lymphocytes.mod) <- "SCT_snn_res.1.5"
B.Lymphocytes.mod.clean <-subset(B.Lymphocytes.mod,idents = c(0,1,2,3,4,5,6,7,9,10,11,12,13,14,15,16,17,18))
B.Lymphocytes.mod.clean <- subset(B.Lymphocytes.mod.clean, subset = percent.mito < 15)

DimPlot(B.Lymphocytes.mod.clean,label = T,pt.size = 1.25)
DimPlot(B.Lymphocytes.mod.clean,label = T,pt.size = 1.25,group.by = "new.Bcell.subsets")

B.Lymphocytes.mod.clean.reclustered <- FindVariableFeatures(B.Lymphocytes.mod.clean, assay = "SCT")

B.Lymphocytes.mod.clean.reclustered <- RunPCA(B.Lymphocytes.mod.clean.reclustered, features = VariableFeatures(B.Lymphocytes.mod.clean.reclustered))

ElbowPlot(B.Lymphocytes.mod.clean.reclustered,ndims = 50)

B.Lymphocytes.mod.clean.reclustered <- FindNeighbors(B.Lymphocytes.mod.clean.reclustered, dims = 1:31,k.param = 15)
B.Lymphocytes.mod.clean.reclustered <- FindClusters(B.Lymphocytes.mod.clean.reclustered, resolution = seq(0.2,2,0.1))

B.Lymphocytes.mod.clean.reclustered <- RunUMAP(B.Lymphocytes.mod.clean.reclustered, dims = 1:31, n.neighbors = 350, n.epochs =NULL, spread = 5, min_dist=0.5, seed.use = 42)
Idents(B.Lymphocytes.mod.clean.reclustered) <- "SCT_snn_res.1.5"

#B lymphocyte figure panel A
Idents(B.Lymphocytes.mod.clean.reclustered) <- "new.Bcell.subsets"
DimPlot(B.Lymphocytes.mod.clean.reclustered,label = F,pt.size = 2)
DimPlot(B.Lymphocytes.mod.clean.reclustered,label = F,pt.size = 2,group.by = "tumour.normal")

FeaturePlot(B.Lymphocytes.mod.clean.reclustered,features = c("BCL2A1","LMO2", "MKI67","CXCR4"))

VlnPlot(B.Lymphocytes.mod.clean.reclustered,features = c("CD274","BIRC5","IL10"))
VlnPlot(B.Lymphocytes.mod.clean.reclustered,features = c("IGKC","IGLJ2"))


Idents(B.Lymphocytes.mod.clean.reclustered) <- "new.Bcell.subsets"
levels(B.Lymphocytes.mod.clean.reclustered)
idents.list <- as.character(levels(B.Lymphocytes.mod.clean.reclustered))
cell.subset.list<- list("MALT.B.IgA", "Plasma.IgG", "Mucosal.B.IGHD","MALT.B.IgM","Plasma.IgG","Follicular.B.cell", "MALT.B.IgA","Plasma.IgG","GC.B.cell","Plasma.IgG","Follicular.B.cell","Plasma.IgG","Plasma.IgG","MALT.B.IgA")
for (i in 1:length(idents.list)) {B.Lymphocytes.mod.clean.reclustered <- SetIdent(B.Lymphocytes.mod.clean.reclustered, cells = WhichCells(B.Lymphocytes.mod.clean.reclustered, idents = idents.list[[i]]), value = cell.subset.list[[i]])}
B.Lymphocytes.mod.clean.reclustered[["simple.Bcell.subsets"]] <- Idents(object = B.Lymphocytes.mod.clean.reclustered)

DimPlot(B.Lymphocytes.mod.clean.reclustered,label = F,pt.size = 2,group.by = "simple.Bcell.subsets", cols = c("#00BFC4", "#00B6EB", "#06A4FF", "#A58AFF", "#DF70F8","#FB61D7"))

DimPlot(B.Lymphocytes.mod.clean.reclustered,label = F,pt.size = 2,group.by = "tumour.normal")

B.Lymphocytes.mod.clean.reclustered.cluster.averages <- AverageExpression(B.Lymphocytes.mod.clean.reclustered, return.seurat = TRUE)


levels(B.Lymphocytes.mod.clean.reclustered.cluster.averages)
levels(B.Lymphocytes.mod.clean.reclustered.cluster.averages)<-c("Follicular.B.cell","GC.B.cell","Mucosal.B.IGHD","MALT.B.IgM","MALT.B.IgA", "Plasma.IgG")

DoHeatmap(B.Lymphocytes.mod.clean.reclustered.cluster.averages, features = c("CD19","MS4A1","HLA-DRA","CD79A","MME","STMN1", "AICDA", "MKI67", "BIRC5","LMO2","BCL2A1","CD27","CD38","MZB1","IGHD","IGHM","JCHAIN","IGHA1","PRDM1","IGHG1","IGHG2","IGHG3","IGHG4","IGLC1","IGKC"), size = 2.5,draw.lines = F,) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 15,face="bold.italic")) 

levels(B.Lymphocytes.mod.clean.reclustered.cluster.averages)
levels(B.Lymphocytes.mod.clean.reclustered.cluster.averages)<-c("Follicular.B.MS4A1.HLADRA","GC.B.DarkZone.AICDA","GC.B.LightZone.BCL2A1", "Mucosal.B.IGHD","MALT.B.IGHA1.JCHAIN.IGL","MALT.B.IGHA1.JCHAIN.IGK", "MALT.B.IGHA1.JCHAIN.IGLL5","MALT.B.IGHM.JCHAIN", "Plasma.B.IGHG1.IGK", "Plasma.B.IGHG2.IGK", "Plasma.B.IGHG1high.IGHG3high.IGK","Plasma.B.IGHG1high.IGHG4high.IGK","Plasma.B.IGHG1low.IGHG4high.IGK","Plasma.B.IGHG1high.IGHG4high.IGL")
levels(B.Lymphocytes.mod.clean.reclustered.cluster.averages)

DoHeatmap(B.Lymphocytes.mod.clean.reclustered.cluster.averages, features = c("CD5","MME","CD19","MS4A1","HLA-DRA","CD79A","STMN1", "AICDA", "MKI67", "BIRC5","LMO2","BCL2A1","CD27","CD38","MZB1",,"JCHAIN","IGHA1","IGHD","IGHM","IGLL5","IGKC","IGHG1","IGHG2","IGHG3","IGHG4","CD274","IL10","GZMB"), size = 2.5,draw.lines = F,) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 15,face="bold.italic")) 

table<- as.data.frame(table(B.Lymphocytes.mod.clean.reclustered@meta.data$simple.Bcell.subsets,B.Lymphocytes.mod.clean.reclustered@meta.data$orig.patient,B.Lymphocytes.mod.clean.reclustered$T.stage,B.Lymphocytes.mod.clean.reclustered$tumour.normal))

#order patients by pT Stage
table$Var2 <- factor(table$Var2, levels=c("OAC2811","OAC3010","OAC22","OAC1210","OAC1212","OAC1411","OAC1512","OAC1710","OAC233","OAC276","OAC44","OAC46","OAC55","OAC67","OAC174","OAC307","OAC132","OAC166","OAC2610","OAC2210","OAC263"))

library(scales)
hue_pal()(14) # c("#F8766D", "#C49A00", "#53B400", "#00C094", "#00B6EB", "#A58AFF", "#FB61D7")
#Myeloid figure panel C
ggplot(table, aes(x = Var3, y = Freq, fill = Var1)) +
    geom_bar(position = "stack",stat = 'identity') +
    xlab("pT Stage") +
    ylab("Count of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#00BFC4", "#00B6EB", "#06A4FF", "#A58AFF", "#DF70F8","#FB61D7")) + 
  facet_grid(Var4 ~ .)+ 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))

ggplot(table, aes(x = Var3, y = Freq, fill = Var1)) +
    geom_bar(position = "fill",stat = 'identity') +
    xlab("pT Stage") +
    ylab("Proportion of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#00BFC4", "#00B6EB", "#06A4FF", "#A58AFF", "#DF70F8","#FB61D7")) + 
  facet_grid(Var4 ~ .)+ 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))

ggplot(table, aes(x = Var2, y = Freq, fill = Var1)) +
    geom_bar(position = "stack",stat = 'identity') +
    xlab("patient") +
    ylab("Count of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#00BFC4", "#00B6EB", "#06A4FF", "#A58AFF", "#DF70F8","#FB61D7")) + 
    facet_grid(Var4 ~ .) + 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))

ggplot(table, aes(x = Var2, y = Freq, fill = Var1)) +
    geom_bar(position = "fill",stat = 'identity') +
    xlab("patient") +
    ylab("Proportion of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#00BFC4", "#00B6EB", "#06A4FF", "#A58AFF", "#DF70F8","#FB61D7")) + 
  facet_grid(Var4 ~ .)+ 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))
```

chi-squared tests
```{r}
d<- table(B.Lymphocytes.mod.clean.reclustered@meta.data$simple.Bcell.subsets,B.Lymphocytes.mod.clean.reclustered$tumour.normal)
chisq.Tcells.tumour.normal <-apply(d, 1, chisq.test)

library("gplots")
balloonplot(t(d), main ="T cells", xlab ="test", ylab="",
            label = FALSE, show.margins = FALSE)
library("graphics")
mosaicplot(d, shade = TRUE, las=2,
           main = "T cells")

d<- table(B.Lymphocytes.mod.clean.reclustered@meta.data$simple.Bcell.subsets,B.Lymphocytes.mod.clean.reclustered$orig.patient)
chisq <-chisq.test(d)
f <- as.data.frame((chisq$observed/chisq$expected))
boxplot(f$Freq ~ f$Var1)
```
plot out pearsons residuals
```{r}
chisq <-chisq.test(d)
chisq$observed
install.packages("corrplot")
library(corrplot)
col2 <- colorRampPalette(c("#003c30", "#01665e" ,"#35978f" ,"#80cdc1","#c7eae5" ,"white", "#f6e8c3" ,"#dfc27d", "#bf812d", "#8c510a", "#543005"))
 corrplot(chisq$residuals, is.cor = FALSE,tl.col = "black",cl.align.text="l",cl.ratio=0.9,cl.cex=1,method = "circle",col = col2(100),outline=T,bg="#f0f0f0")
```
as percentages
```{r}

contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
corrplot(contrib, is.cor = FALSE,tl.col = "black",cl.align.text="l",cl.ratio=0.9,cl.cex=1)
chisq$p.value
```

```{r}
B.Lymphocytes.List <- list()
for (i in 1:length(idents.list)) {B.Lymphocytes.List[[i]] <-  WhichCells(B.Lymphocytes, idents = B.Lymphocyte.Subtypes[[i]])}
Idents(OAC1) <- OAC1[["Cell.Subtypes"]]
for (i in 1:length(idents.list)) {OAC1 <- SetIdent(OAC1, cells = B.Lymphocytes.List[[i]], value = B.Lymphocyte.Subtypes[[i]])}

DimPlot(OAC1, reduction = "umap", label = T, repel = T, split.by = "tumour.normal") + NoLegend()
```

```{r}
OAC1[["Cell.Subtypes"]] <- Idents(object = OAC1)
```
 

```{r, fig.height=8, fig.width=8}
Idents(OAC1)<- OAC1[["Subsets"]]
Antigen.Presenting <- SubsetData(OAC1, ident.use = c("Macrophages"))
Antigen.Presenting <- FindVariableFeatures(Antigen.Presenting, assay = "SCT")

plot1 <- VariableFeaturePlot(Antigen.Presenting)
top10 <- head(VariableFeatures(Antigen.Presenting), 10)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2), ncol = 1)
```

```{r}
Antigen.Presenting <- RunPCA(Antigen.Presenting, features = VariableFeatures(Antigen.Presenting))
```

```{r}
DimPlot(Antigen.Presenting, reduction = "pca")
```

```{r}
DimHeatmap(Antigen.Presenting, dims = 1, cells = 500, balanced = TRUE)
```

```{r}
DimHeatmap(Antigen.Presenting, dims = 1:9, cells = 500, balanced = TRUE)
```

```{r}
ElbowPlot(Antigen.Presenting)
```

```{r}
ElbowPlot(Antigen.Presenting, ndims = 50) #select 20 PCAs for analysis

Antigen.Presenting.mod <- FindNeighbors(Antigen.Presenting, dims = 1:20,k.param = 15)
Antigen.Presenting.mod <- FindClusters(Antigen.Presenting.mod, resolution = seq(0.2,2,0.1))

Antigen.Presenting.mod <- RunUMAP(Antigen.Presenting.mod, dims = 1:20, n.neighbors = 203, n.epochs =NULL, spread = 5, min_dist=0.5, seed.use = 42)
Idents(Antigen.Presenting.mod) <- "SCT_snn_res.1.4" #0.8 res seems to work
DimPlot(Antigen.Presenting.mod,label = T,pt.size = 1.25)
DimPlot(Antigen.Presenting.mod,group.by = "orig.ident") #orig.patient seq.batch Dying
DimPlot(Antigen.Presenting.mod,group.by = "Cell.Subtypes")
DimPlot(Antigen.Presenting.mod,group.by = "tumour.normal")

Antigen.Presenting.mod.cluster.averages <- AverageExpression(Antigen.Presenting.mod, return.seurat = TRUE)

DoHeatmap(Antigen.Presenting.mod.cluster.averages, features = c("CTSS","FCN1","S1008A","S100A9", "LYZ","VCAN", "LGMN","CTSB","CD14","CD68","CD163","MRC1","MSR1", "FCGR3A","MAFB","MAF", "CX3CR1","ITGAM","CSF1R","IL1B","CXCL8","APOE","CD163","SEPP1","C1QB","C1QA","C1QC","STMN1","MKI67","TOP2A","CLEC10A","CD1C","CLEC4C","PTCRA","CCL7","LAMP3","CLEC9A"), size = 0, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 20,face="bold.italic"))

DoHeatmap(Antigen.Presenting.mod.cluster.averages, features = c("IL12A","IL1B","TNF","IL6","IL13","IL10","IL1RN","TGFB1","CCL17","CCL18","CCL22","PTGES2"), size = 0, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 20,face="bold.italic"))

Antigen.Presenting.mod.markers <- FindAllMarkers(Antigen.Presenting.mod, only.pos = T)
top2 <- Antigen.Presenting.mod.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
top5 <- Antigen.Presenting.mod.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
top10 <- Antigen.Presenting.mod.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
top20 <- Antigen.Presenting.mod.markers %>% group_by(cluster) %>% top_n(n = 20, wt = avg_logFC)
write.csv(Antigen.Presenting.mod.markers,file = "Antigen.Presenting.mod.markers.csv",quote = F)

DoHeatmap(Antigen.Presenting.mod, features = top20$gene,assay = "SCT",) + NoLegend() +
theme(text = element_text(size = 7,face="bold.italic"))

FeaturePlot(Antigen.Presenting.mod,features = c("nFeature_RNA","nCount_RNA","percent.mito"),min.cutoff = "q10",max.cutoff = "q90")
VlnPlot(Antigen.Presenting.mod,features = c("percent.mito","nFeature_RNA","nCount_RNA"))
VlnPlot(Antigen.Presenting.mod,features = c("CLEC9A","FCER1A","VCAN","FCGR3A","AXL","ITM2C"))
VlnPlot(Antigen.Presenting.mod,features = c("CD163","CCL3","CD68","MRC1","CD14","CD1D"))
VlnPlot(Antigen.Presenting.mod,features = c("CD14","FCGR3A"))
VlnPlot(Antigen.Presenting.mod,features = c("CCL2","CCL5","IL10"))
DotPlot(Antigen.Presenting.mod,features = c("CD68","CD163","MSR1"))
VlnPlot(Antigen.Presenting.mod,features = c("CD40", "CD68","CD163","MSR1","MRC1","CCL18","CCL20","EREG","GPNMB"))
VlnPlot(Antigen.Presenting.mod,features = c("CD8A","CD1C"))

FeaturePlot(Antigen.Presenting.mod,features = c("CD68","CD163","MRC1","CCL7"),min.cutoff = "q10",max.cutoff = "q90")
FeaturePlot(Antigen.Presenting.mod,features = c("CD14","FCGR3A","CD68","CD163","MRC1"),min.cutoff = "q10",max.cutoff = "q90")

idents.list <- as.character(0:10)
cell.subset.list<- list("mo-macs","M2like.TAMs","M2like.TAMs","M1like.TAMs","DC.CD1C","DC.CD1C","DC.CLEC9A","pDC.LAMP3","monocytes","remove","DC.CD1C")
for (i in 1:length(idents.list)) {Antigen.Presenting.mod <- SetIdent(Antigen.Presenting.mod, cells = WhichCells(Antigen.Presenting.mod, idents = idents.list[[i]]), value = cell.subset.list[[i]])}
Antigen.Presenting.mod[["new.Antigen.Presenting.subsets"]] <- Idents(object = Antigen.Presenting.mod)

DimPlot(Antigen.Presenting.mod,label = T,pt.size = 1.25,group.by = "new.Antigen.Presenting.subsets")
Idents(Antigen.Presenting.mod) <- "new.Antigen.Presenting.subsets"
Idents(Antigen.Presenting.mod) <- "SCT_snn_res.1.4"
Antigen.Presenting.mod.clean <-subset(Antigen.Presenting.mod,idents = c(0,1,2,3,4,5,6,7,8,10))
DimPlot(Antigen.Presenting.mod.clean,group.by = "new.Antigen.Presenting.subsets")


Antigen.Presenting.mod.clean.reclustered <- FindVariableFeatures(Antigen.Presenting.mod.clean, assay = "SCT")

Antigen.Presenting.mod.clean.reclustered <- RunPCA(Antigen.Presenting.mod.clean.reclustered, features = VariableFeatures(Antigen.Presenting.mod.clean.reclustered))

ElbowPlot(Antigen.Presenting.mod.clean.reclustered, ndims = 50) #select 20 PCAs for analysis

Antigen.Presenting.mod.clean.reclustered <- FindNeighbors(Antigen.Presenting.mod.clean.reclustered, dims = 1:20,k.param = 15)
Antigen.Presenting.mod.clean.reclustered <- FindClusters(Antigen.Presenting.mod.clean.reclustered, resolution = seq(0.2,2,0.1))

Antigen.Presenting.mod.clean.reclustered <- RunUMAP(Antigen.Presenting.mod.clean.reclustered, dims = 1:20, n.neighbors = 195, n.epochs =NULL, spread = 5, min_dist=0.5, seed.use = 42)
#Myeloid figure panel A
DimPlot(Antigen.Presenting.mod.clean.reclustered,group.by = "new.Antigen.Presenting.subsets",pt.size = 2)
DimPlot(Antigen.Presenting.mod.clean.reclustered,group.by = "tumour.normal",pt.size = 2)

Idents(Antigen.Presenting.mod.clean.reclustered) <- "SCT_snn_res.1.2" #0.8 res seems to work
DimPlot(Antigen.Presenting.mod.clean.reclustered,label = T,pt.size = 1.25)
DimPlot(Antigen.Presenting.mod.clean.reclustered,group.by = "orig.ident") #orig.patient seq.batch Dying
DimPlot(Antigen.Presenting.mod.clean.reclustered,group.by = "Cell.Subtypes")
DimPlot(Antigen.Presenting.mod.clean.reclustered,group.by = "tumour.normal")
DimPlot(Antigen.Presenting.mod.clean.reclustered,group.by = "new.Antigen.Presenting.subsets")

Idents(Antigen.Presenting.mod.clean.reclustered) <- "new.Antigen.Presenting.subsets"
levels(Antigen.Presenting.mod.clean.reclustered)
levels(Antigen.Presenting.mod.clean.reclustered)<-c("monocytes","mo-macs","M1like.TAMs","M2like.TAMs","DC.CLEC9A","DC.CD1C","pDC.LAMP3")
Antigen.Presenting.mod.clean.reclustered.markers <- FindAllMarkers(Antigen.Presenting.mod.clean.reclustered, only.pos = T)
top2 <- Antigen.Presenting.mod.clean.reclustered.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
top5 <- Antigen.Presenting.mod.clean.reclustered.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
top10 <- Antigen.Presenting.mod.clean.reclustered.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(Antigen.Presenting.mod.clean.reclustered, features = top10$gene,assay = "SCT") + NoLegend() +
theme(text = element_text(size = 10,face="bold.italic"))

write.csv(Antigen.Presenting.mod.clean.reclustered,file = "Antigen.Presenting.mod.clean.reclustered.csv",quote = F)


Idents(Antigen.Presenting.mod.clean.reclustered) <- "new.Antigen.Presenting.subsets"
Antigen.Presenting.mod.clean.reclustered.cluster.averages <- AverageExpression(Antigen.Presenting.mod.clean.reclustered, return.seurat = TRUE)

levels(Antigen.Presenting.mod.clean.reclustered.cluster.averages)
levels(Antigen.Presenting.mod.clean.reclustered.cluster.averages)<-c("monocytes","mo-macs","M1like.TAMs","M2like.TAMs","DC.CLEC9A","DC.CD1C","pDC.LAMP3")
#Myeloid figure panel B
DoHeatmap(Antigen.Presenting.mod.clean.reclustered.cluster.averages, features = c("CTSS","FCN1","LYZ","S1008A","S100A9","VCAN", "CD14","FCGR3A","CD68","CD80","CD163","MRC1","MSR1","LGMN", "CCL3","CCL4","CXCL8","IL1B","TNF","APOC1","APOE","CCL18","GPNMB","MMP12", "SPP1","STMN1","MKI67","TOP2A","CST3","CLEC9A","XCR1", "CD1C","FCER1A","IL1R2","LAMP3", "NUB1","CLEC4C"), size = 0, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 20,face="bold.italic"))

DoHeatmap(Antigen.Presenting.mod.clean.reclustered.cluster.averages, features = c("IL12A","IL1B","TNF","IL6","IL13","IL10","IL1RN","TGFB1","CCL17","CCL18","CCL22","PTGES2"), size = 0, 
    draw.lines = F) +scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"))) +
theme(text = element_text(size = 20,face="bold.italic"))

table<- as.data.frame(table(Antigen.Presenting.mod.clean.reclustered@meta.data$new.Antigen.Presenting.subsets,Antigen.Presenting.mod.clean.reclustered@meta.data$orig.patient,Antigen.Presenting.mod.clean.reclustered$pT.Stage,Antigen.Presenting.mod.clean.reclustered$tumour.normal))

#order patients by pT Stage
table$Var2 <- factor(table$Var2, levels=c("OAC2811","OAC3010","OAC22","OAC1210","OAC1212","OAC1411","OAC1512","OAC1710","OAC233","OAC276","OAC44","OAC46","OAC55","OAC67","OAC174","OAC307","OAC132","OAC166","OAC2610","OAC2210","OAC263"))

library(scales)
hue_pal()(7) # c("#F8766D", "#C49A00", "#53B400", "#00C094", "#00B6EB", "#A58AFF", "#FB61D7")
#Myeloid figure panel C
ggplot(table, aes(x = Var3, y = Freq, fill = Var1)) +
    geom_bar(position = "stack",stat = 'identity') +
    xlab("pT Stage") +
    ylab("Count of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#F8766D","#F8766D", "#C49A00", "#53B400", "#00C094", "#00B6EB", "#A58AFF", "#FB61D7")) + 
  facet_grid(Var4 ~ .)+ 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))

ggplot(table, aes(x = Var3, y = Freq, fill = Var1)) +
    geom_bar(position = "fill",stat = 'identity') +
    xlab("pT Stage") +
    ylab("Proportion of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#F8766D","#F8766D", "#C49A00", "#53B400", "#00C094", "#00B6EB", "#A58AFF", "#FB61D7")) + 
  facet_grid(Var4 ~ .)+ 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))

ggplot(table, aes(x = Var2, y = Freq, fill = Var1)) +
    geom_bar(position = "stack",stat = 'identity') +
    xlab("patient") +
    ylab("Count of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#F8766D","#F8766D", "#C49A00", "#53B400", "#00C094", "#00B6EB", "#A58AFF", "#FB61D7")) + 
    facet_grid(Var4 ~ .) + 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))

ggplot(table, aes(x = Var2, y = Freq, fill = Var1)) +
    geom_bar(position = "fill",stat = 'identity') +
    xlab("patient") +
    ylab("Proportion of Myeloid cells") +
    theme(legend.title=element_blank()) + 
    scale_fill_manual(values=c("#F8766D","#F8766D", "#C49A00", "#53B400", "#00C094", "#00B6EB", "#A58AFF", "#FB61D7")) + 
  facet_grid(Var4 ~ .)+ 
    theme(axis.text.x = element_text(angle = 90))+
    theme(panel.background = element_blank())+ 
    theme(axis.line = element_line(colour = "black"))
```


```{r}
#chi-squared tests
d <- table(Antigen.Presenting.mod.clean.reclustered@meta.data$new.Antigen.Presenting.subsets,Antigen.Presenting.mod.clean.reclustered$tumour.normal)
d <- d[-2,] #exclude row name "remove"

chisq.Tcells.tumour.normal <-apply(d, 1, chisq.test)
library("gplots")
balloonplot(t(d), main ="T cells", xlab ="test", ylab="",
            label = FALSE, show.margins = FALSE)
library("graphics")
mosaicplot(d, shade = TRUE, las=2,
           main = "T cells")

#plot out pearsons residuals
chisq <-chisq.test(d)
chisq$observed
install.packages("corrplot")
library(corrplot)
col2 <- colorRampPalette(c("#003c30", "#01665e" ,"#35978f" ,"#80cdc1","#c7eae5" ,"white", "#f6e8c3" ,"#dfc27d", "#bf812d", "#8c510a", "#543005"))
 corrplot(chisq$residuals, is.cor = FALSE,tl.col = "black",cl.align.text="l",cl.ratio=0.9,cl.cex=1,method = "circle",col = col2(100),outline=T,bg="#f0f0f0")

#as percentages
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
corrplot(contrib, is.cor = FALSE,tl.col = "black",cl.align.text="l",cl.ratio=0.9,cl.cex=1)
chisq$p.value

#Tumour only cells
Idents(Antigen.Presenting.mod.clean.reclustered) <- "T.stage"
levels(Antigen.Presenting.mod.clean.reclustered)
idents.list = as.character(0:4)
pT.Stage = list("0-2","0-2","0-2","3-4","3-4")
for (i in 1:length(idents.list)) {Antigen.Presenting.mod.clean.reclustered <- SetIdent(Antigen.Presenting.mod.clean.reclustered, cells = WhichCells(Antigen.Presenting.mod.clean.reclustered, idents = idents.list[[i]]), value = pT.Stage[[i]])}
Antigen.Presenting.mod.clean.reclustered[["pT.Stage"]] <- Idents(object = Antigen.Presenting.mod.clean.reclustered)
DimPlot(Antigen.Presenting.mod.clean.reclustered, label = T, repel = T) + NoLegend()

Idents(Antigen.Presenting.mod.clean.reclustered) <- "tumour.normal" 
Antigen.Presenting.mod.clean.reclustered.tumour <- subset(Antigen.Presenting.mod.clean.reclustered,idents = "Tumour")
d<- table(Antigen.Presenting.mod.clean.reclustered.tumour@meta.data$new.Antigen.Presenting.subsets,Antigen.Presenting.mod.clean.reclustered.tumour@meta.data$pT.Stage)
d<- d[-2,] #exclude row name "remove"
chisq.Tcells.tumour <-apply(d, 1, chisq.test)
library("gplots")
balloonplot(t(d), main ="Myeloid cells", xlab ="test", ylab="",
            label = FALSE, show.margins = FALSE)
library("graphics")
mosaicplot(d, shade = TRUE, las=2,
           main = "Myeloid cells")

#plot out pearsons residuals
chisq <-chisq.test(d)
chisq$observed
install.packages("corrplot")
library(corrplot)
col2 <- colorRampPalette(c("#003c30", "#01665e" ,"#35978f" ,"#80cdc1","#c7eae5" ,"white", "#f6e8c3" ,"#dfc27d", "#bf812d", "#8c510a", "#543005"))
 corrplot(chisq$residuals, is.cor = FALSE,tl.col = "black",cl.align.text="l",cl.ratio=0.9,cl.cex=1,method = "circle",col = col2(100),outline=T,bg="#f0f0f0")

#as percentages
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
corrplot(contrib, is.cor = FALSE,tl.col = "black",cl.align.text="l",cl.ratio=0.9,cl.cex=1)
chisq$p.value


##############
#mats analysis -  ends here
##############

```


```{r}
```{r}Idents(Antigen.Presenting) <- "SCT_snn_res.0.5"
idents.list = as.character(0:5)
Antigen.Presenting.Subtypes = list("TAMs.M2.Macrophages","TAMs.M1.Macrophages","cDCs","M0.Macrophages","M1.Macrophages","pDCs")

for (i in 1:length(idents.list)) {Antigen.Presenting <- SetIdent(Antigen.Presenting, cells = WhichCells(Antigen.Presenting, idents = idents.list[[i]]), value = Antigen.Presenting.Subtypes[[i]])}
Antigen.Presenting[["Cell.Subtypes"]] <- Idents(object = Antigen.Presenting)
DimPlot(Antigen.Presenting, label = T, repel = T) + NoLegend()
```

```{r}
Antigen.Presenting.List <- list()
for (i in 1:length(idents.list)) {Antigen.Presenting.List[[i]] <-  WhichCells(Antigen.Presenting, idents = Antigen.Presenting.Subtypes[[i]])}
Idents(OAC1) <- OAC1[["Cell.Subtypes"]]
for (i in 1:length(idents.list)) {OAC1 <- SetIdent(OAC1, cells = Antigen.Presenting.List[[i]], value = Antigen.Presenting.Subtypes[[i]])}

DimPlot(OAC1, reduction = "umap", label = T, repel = T, split.by = "tumour.normal") + NoLegend()
```

```{r}
OAC1[["Cell.Subtypes"]] <- Idents(object = OAC1)
```

Now the CAF monocle analysis
```{r}

library(monocle)
library(tidyverse)
library(reshape2)
library(Seurat)
library(Matrix)
library(rgl)
library(magick)
library(viridis)
library(ape)
library(sctransform)
library(ggrepel)
library(ggdendro)
library(tidyverse)
library(cowplot)
library(dropbead)
library("ggpubr")
library(ggbeeswarm)
library(beeswarm)
library(pheatmap)
library(cellwrangler)
```


```{r warning=F, message=F}
#rm(list=setdiff(ls(), "CAFs"))
OACM <- CAFs
OACM <- as.CellDataSet(OACM, reduction = "umap")
OACM  
```


```{r}
OACM <- estimateSizeFactors(OACM)
OACM <- estimateDispersions(OACM)
OACM <- detectGenes(OACM, min_expr = 0.1)
print(head(fData(OACM)))
cols=c("#A3A500","#00BF7D","#00B0F6","#F8766D", "#E76BF3")
cols <- structure(cols, names = levels(OACM@phenoData@data$Cell.Subtypes))

```

```{r}
expressed_genes <- row.names(subset(fData(OACM), num_cells_expressed >= 10))
```

```{r}
head(pData(OACM))
pData(OACM)$Total_mRNAs <- Matrix::colSums(exprs(OACM))
upper_bound <- 10^(mean(log10(pData(OACM)$Total_mRNAs)) +
                     2*sd(log10(pData(OACM)$Total_mRNAs)))
lower_bound <- 10^(mean(log10(pData(OACM)$Total_mRNAs)) -
                     2*sd(log10(pData(OACM)$Total_mRNAs)))
```

```{r}
qplot(Total_mRNAs, data = pData(OACM), color = Cell.Subtypes, geom =
        "density") +
  geom_vline(xintercept = lower_bound) +
  geom_vline(xintercept = upper_bound)
```


```{r}
pData(OACM)$CellType <- pData(OACM)$Cell.Subtypes
pie <- ggplot(pData(OACM), aes(x = factor(1), fill = factor(CellType))) + geom_bar(width = 1)
pie + coord_polar(theta = "y") + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
```
From Monocle:
We recommend users order cells using genes selected by an unsupervised procedure called "dpFeature".
To use dpFeature, we first select superset of feature genes as genes expressed in at least 5% of all the cells.
```{r}
OACM <- detectGenes(OACM, min_expr = 0.1)
fData(OACM)$use_for_ordering <-
    fData(OACM)$num_cells_expressed > 0.05 * ncol(OACM)
```
Then we will perform a PCA analysis to identify the variance explained by each PC (principal component). We can look at a scree plot and determine how many pca dimensions are wanted based on whether or not there is a significant gap between that component and the component after it. By selecting only the high loading PCs, we effectively only focus on the more interesting biological variations.
```{r}
plot_pc_variance_explained(OACM, return_all = F)
```
We will then run reduceDimension with t-SNE as the reduction method on those top PCs and project them further down to two dimensions. 
I took 10 PCs because that is what I have used in the analysis in Seurat 

```{r}
OACM <- reduceDimension(OACM,
                              max_components = 2,
                              norm_method = 'log',
                              num_dim = 15,
                              reduction_method = 'tSNE',
                              verbose = T)

```
Then we can run density peak clustering to identify the clusters on the 2-D t-SNE space. The densityPeak algorithm clusters cells based on each cell's local density (Ρ) and the nearest distance (Δ) of a cell to another cell with higher distance. We can set a threshold for the Ρ, Δ and define any cell with a higher local density and distance than the thresholds as the density peaks. Those peaks are then used to define the clusters for all cells. By default, clusterCells choose 95% of Ρ and Δ to define the thresholds. We can also set a number of clusters (n) we want to cluster. In this setting, we will find the top n cells with high Δ with Δ among the top 50% range. The default setting often gives good clustering.
```{r}
OACM <- clusterCells(OACM, verbose = F)
```

```{r}
plot_cell_clusters(OACM, color_by = 'as.factor(Cluster)')
```

```{r}
plot_cell_clusters(OACM, color_by = 'as.factor(Cell.Subtypes)')
```
We also provide the decision plot for users to check the Ρ, Δ for each cell and decide the threshold for defining the cell clusters.
```{r}
plot_rho_delta(OACM, rho_threshold = 10, delta_threshold = 4 )
```
We could then re-run clustering based on the user defined threshold. To facilitate the computation, we can set (skip_rho_sigma = T) which enables us to skip the calculation of the Ρ, Σ.
```{r}
OACM <- clusterCells(OACM,
                 rho_threshold = 10,
                 delta_threshold = 4,
                 skip_rho_sigma = T,
                 verbose = F)
```

```{r}
plot_cell_clusters(OACM, color_by = 'as.factor(Cluster)')
```

```{r}
plot_cell_clusters(OACM, color_by = 'as.factor(Cell.Subtypes)')
```
After we confirm the clustering makes sense, we can then perform differential gene expression test as a way to extract the genes that distinguish them.
```{r}
clustering_DEG_genes <-
    differentialGeneTest(OACM[expressed_genes,],
          fullModelFormulaStr = '~Cell.Subtypes')
```
We will then select the top 1000 significant genes as the ordering genes.
```{r}
OACM_ordering_genes <-
    row.names(clustering_DEG_genes)[order(clustering_DEG_genes$qval)][1:1000]

OACM <- setOrderingFilter(OACM, ordering_genes = OACM_ordering_genes)

OACM <- reduceDimension(OACM, method = 'DDRTree')

OACM <- orderCells(OACM)

plot_cell_trajectory(OACM, color_by = "Cell.Subtypes", show_tree = T, show_branch_points = T)+ scale_color_manual(values = cols, name = "Cell.Subtypes")
```

```{r}
plot_cell_trajectory(OACM,cell_size = 0.5, color_by = "Cell.Subtypes", show_tree = T, show_branch_points = T, show_state_number = F) + labs(title = "CAF Subtypes",x = "Pseudotime", y = "") + theme(axis.text = element_blank(), axis.ticks = element_blank()) + NoLegend() + scale_color_manual(values = cols, name = "Cell.Subtypes")
```

```{r}
plot_cell_trajectory(OACM, color_by = "Pseudotime", show_tree = T, show_branch_points = F)+ scale_color_viridis(option = "C") + DarkTheme() + labs(title = "Monocle Pseudotime",x = "Monocle", y = "") + theme(legend.position = "top", legend.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) 
```


```{r}
save(OACM, OACM_ordering_genes, clustering_DEG_genes, pie, lower_bound, upper_bound, expressed_genes, file ="~/Desktop/Paper Seurat 3/Monocle.CAFs.Rdata")
#rm(OACM, OACM_ordering_genes, clustering_DEG_genes, pie, lower_bound, upper_bound, expressed_genes)
```


```{r}
# cluster the top 50 genes that vary as a function of pseudotime

clustering_DEG_genes %>% arrange(qval) %>% head(50) %>% select(gene_short_name) -> gene_to_cluster
gene_to_cluster <- gene_to_cluster$gene

my_pseudotime_cluster <- plot_pseudotime_heatmap(OACM[gene_to_cluster,], num_clusters = 5,cores = 4,show_rownames = TRUE,return_heatmap = TRUE, )

plot_pheatmap(my_pseudotime_cluster) # Works in Console only

```

```{r}
# cluster the top 50 genes that vary as a function of pseudotime

gene_to_cluster2 <- c(gene_to_cluster, "ACTA2", "PLAU", "PLAT", "CXCL6", "IL6", "CSF1", "DLL1", "PDE5A", "PDE2A", "THBS4")


my_pseudotime_cluster2 <- plot_pseudotime_heatmap(OACM[gene_to_cluster2,], num_clusters = 5,cores = 4,show_rownames = TRUE,return_heatmap = TRUE, )

plot_pheatmap(my_pseudotime_cluster2)

```

```{r}
pseudotime_monocle <-
    data.frame(
        Cell.type = phenoData(OACM)$Cell.Subtypes,
        pseudotime = phenoData(OACM)$Pseudotime,
        Class = phenoData(OACM)$Subsets
    )

ggplot(pseudotime_monocle, 
       aes(x = pseudotime, 
           y = Cell.type, colour = Cell.type))+
    geom_quasirandom(groupOnX = F) + theme_classic() +
    xlab("monocle pseudotime") + ylab("Cell types") +
    ggtitle("Cells ordered by monocle pseudotime")

ggplot(pseudotime_monocle)+ 
  geom_density(aes(x = pseudotime,fill = Cell.type,alpha =0.3,color=Cell.type))+
  theme_classic() +
    xlab("monocle pseudotime") + ylab("Cell types") +
    ggtitle("Cells ordered by monocle pseudotime")

ggplot(pseudotime_monocle) +
  geom_jitter(aes(pseudotime,Cell.type, color = Cell.type),height = 0.04, size = 0.6) + theme_classic()+ scale_color_manual(values = cols, name = "Cell.Subtypes")+ xlab("monocle pseudotime")
```

```{r}


plot_cell_trajectory(OACM,cell_size = 0.5, color_by = "Cell.Subtypes", show_tree = T, show_branch_points = F, show_state_number = F)+ scale_color_manual(values = cols, name = "Cell.Subtypes") + labs(title = "CAF Subtypes",x = "Pseudotime", y = "") + theme(axis.text = element_blank(), axis.ticks = element_blank()) + NoLegend() + facet_wrap(~Cell.Subtypes, nrow = 5)

ggplot(pseudotime_monocle) +
  geom_jitter(aes(pseudotime,Class, color = Cell.type),height = 0.04, size = 2) + theme_classic()+ scale_color_manual(values = cols, name = "Cell.Subtypes")+ xlab("monocle pseudotime")

plot_cell_trajectory(OACM,cell_size = 2, color_by = "Cell.Subtypes", show_tree = T, show_branch_points = F, show_state_number = F)+ scale_color_manual(values = cols, name = "Cell.Subtypes") + labs(title = "CAF Subtypes",x = "Pseudotime", y = "") + theme(axis.text = element_blank(), axis.ticks = element_blank()) + NoLegend() 

unique(pseudotime_monocle$Cell.type)
pseudotime_monocle <- pseudotime_monocle %>% arrange(factor(Cell.type, levels = c("Inflammatory.CAFS" , "Myofibroblast.CAFs", "Collagen.CAFs", "Thrombospondin.CAFs","MFAP5.Semaphorin.CAFs")))

```


```{r}
BEAM_res <- BEAM(OACM, branch_point = 1, cores = 4,verbose = T)
#error message: the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be used

BEAM_res <- BEAM_res[order(BEAM_res$qval),]

BEAM_res <- BEAM_res[,c("gene_short_name", "pval", "qval")]
```

```{r}
OACM.Beam.1 <- OACM[row.names(subset(BEAM_res,qval < 5e-2)),]
```

```{r}

#To store the pheatmap odject - Error in if (nrow(ancestor_exprs) == 1) exprs_data <- t(as.matrix(ancestor_exprs)) else exprs_data <- ancestor_exprs : argument is of length zero
Plot1 <- plot_genes_branched_heatmap(OACM.Beam.1,branch_point = 1,
                            num_clusters = 4,
                            cores = 4,
                            use_gene_short_name = TRUE,
                            show_rownames = TRUE,
                            return_heatmap = TRUE)
plot_pheatmap(Plot1)

#Use this to plot out the heatmap
plot_pheatmap(plot_genes_branched_heatmap(OACM.Beam.1,branch_point = 1,
                            num_clusters = 3,
                            cores = 4,
                            use_gene_short_name = TRUE,
                            show_rownames = TRUE,
                            return_heatmap = TRUE))
```

```{r}
BEAM_res <- BEAM(OACM, branch_point = 2, cores = 4)

BEAM_res <- BEAM_res[order(BEAM_res$qval),]

BEAM_res.filtered <- BEAM_res[,c("gene_short_name", "pval", "qval")]
```

```{r}
OACM.Beam.2 <- OACM[row.names(subset(BEAM_res.filtered,qval < 5e-2)),]
```

```{r}
#To store the pheatmap object
Plot2 <- plot_pheatmap(plot_genes_branched_heatmap(OACM.Beam.2,branch_point = 2,
                            num_clusters = 5,
                            cores = 4,
                            use_gene_short_name = TRUE,
                            show_rownames = F,
                            return_heatmap = TRUE,
                            branch_labels = c("MyoCAFs", "ColCAFs"), 
                            branch_colors = c("darkgrey", "#00B0F6", "#E76BF3")))


clusters <- Plot2$annotation_row

```

and monocle for the Myeloid cells
Creating a new dataset called APC in Monocle from the APCs

```{r warning=F, message=F}
APC.Seurat <- Antigen.Presenting.mod.clean.reclustered.tumour
rm(list=setdiff(ls(), "APC.Seurat"))
APC <- as.CellDataSet(APC.Seurat, reduction = "umap")
APC  
```


```{r}
#Estimate size factors and dispersions
APC <- estimateSizeFactors(APC)
APC <- estimateDispersions(APC)
#Filter low-quality cells
APC <- detectGenes(APC, min_expr = 0.1)
print(head(fData(APC)))
```

```{r}
expressed_genes <- row.names(subset(fData(APC), num_cells_expressed >= 10))
```

```{r}
head(pData(APC))
pData(APC)$Total_mRNAs <- Matrix::colSums(exprs(APC))
upper_bound <- 10^(mean(log10(pData(APC)$Total_mRNAs)) +
                     2*sd(log10(pData(APC)$Total_mRNAs)))
lower_bound <- 10^(mean(log10(pData(APC)$Total_mRNAs)) -
                     2*sd(log10(pData(APC)$Total_mRNAs)))
```

```{r}
qplot(Total_mRNAs, data = pData(APC), geom =
        "density", color = new.Antigen.Presenting.subsets) +
  geom_vline(xintercept = lower_bound) +
  geom_vline(xintercept = upper_bound)
```

```{r}
# Log-transform each value in the expression matrix.
L <- log(exprs(APC[expressed_genes,]))

# Standardize each gene, so that they are all on the same scale,
# Then melt the data with plyr so we can plot it easily
melted_dens_df <- melt(Matrix::t(scale(Matrix::t(L))))

# Plot the distribution of the standardized gene expression values.
qplot(value, geom = "density", data = melted_dens_df) +
stat_function(fun = dnorm, size = 0.5, color = 'red') +
xlab("Standardized log(FPKM)") +
ylab("Density")
```


```{r}
pData(APC)$CellType <- pData(APC)$new.Antigen.Presenting.subsets
pie <- ggplot(pData(APC), aes(x = factor(1), fill = factor(CellType))) + geom_bar(width = 1)
pie + coord_polar(theta = "y") + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
```
From Monocle:
We recommend users order cells using genes selected by an unsupervised procedure called "dpFeature".
To use dpFeature, we first select superset of feature genes as genes expressed in at least 5% of all the cells.
```{r}
APC <- detectGenes(APC, min_expr = 0.1)
fData(APC)$use_for_ordering <-
    fData(APC)$num_cells_expressed > 0.05 * ncol(APC)
```
Then we will perform a PCA analysis to identify the variance explained by each PC (principal component). We can look at a scree plot and determine how many pca dimensions are wanted based on whether or not there is a significant gap between that component and the component after it. By selecting only the high loading PCs, we effectively only focus on the more interesting biological variations.
```{r}
plot_pc_variance_explained(APC, return_all = F)
```
We will then run reduceDimension with t-SNE as the reduction method on those top PCs and project them further down to two dimensions. 
I took 20 PCs because thatis what I have used in the analysis in Seurat for the Antigen Presenting Cell datset 

```{r}
APC <- reduceDimension(APC,
                              max_components = 2,
                              norm_method = 'log',
                              num_dim = 20,
                              reduction_method = 'tSNE',
                              verbose = T)

```
Then we can run density peak clustering to identify the clusters on the 2-D t-SNE space. The densityPeak algorithm clusters cells based on each cell's local density (Ρ) and the nearest distance (Δ) of a cell to another cell with higher distance. We can set a threshold for the Ρ, Δ and define any cell with a higher local density and distance than the thresholds as the density peaks. Those peaks are then used to define the clusters for all cells. By default, clusterCells choose 95% of Ρ and Δ to define the thresholds. We can also set a number of clusters (n) we want to cluster. In this setting, we will find the top n cells with high Δ with Δ among the top 50% range. The default setting often gives good clustering.
```{r}
APC <- clusterCells(APC, verbose = F)
```

```{r}
plot_cell_clusters(APC, color_by = 'as.factor(Cluster)')
```

```{r}
plot_cell_clusters(APC, color_by = 'as.factor(new.Antigen.Presenting.subsets)')
```
We also provide the decision plot for users to check the Ρ, Δ for each cell and decide the threshold for defining the cell clusters.
```{r}
plot_rho_delta(APC, rho_threshold = 30, delta_threshold = 4 )
```
We could then re-run clustering based on the user defined threshold. To facilitate the computation, we can set (skip_rho_sigma = T) which enables us to skip the calculation of the Ρ, Σ.
```{r}
APC <- clusterCells(APC,
                 rho_threshold = 13,
                 delta_threshold = 4,
                 skip_rho_sigma = T,
                 verbose = F)
```

```{r}
plot_cell_clusters(APC, color_by = 'as.factor(Cluster)')
```

```{r}
plot_cell_clusters(APC, color_by = 'as.factor(new.Antigen.Presenting.subsets)')
```
After we confirm the clustering makes sense, we can then perform differential gene expression test as a way to extract the genes that distinguish them.
```{r}
clustering_DEG_genes <- differentialGeneTest(APC[expressed_genes,], fullModelFormulaStr = '~new.Antigen.Presenting.subsets',cores = 3)
alt_clustering_DEG_genes <- FindAllMarkers(APC.Seurat, only.pos = T, logfc.threshold = 0.2)
```

```{r}
GM_state <- function(cds){
  if (length(unique(pData(cds)$State)) > 1){
    T0_counts <- table(pData(cds)$State,
                       pData(cds)$new.Antigen.Presenting.subsets)[,"Monocytes.CD14"]
    return(as.numeric(names(T0_counts)[which
          (T0_counts == max(T0_counts))]))
  } else {
    return (1)
  }
}
```

We will then select the top X significant genes as the ordering genes.
```{r}

filtered_clustering_DEG_genes <- subset(clustering_DEG_genes,qval < 1e-4 & use_for_ordering =="TRUE")

APC_ordering_genes <- row.names(filtered_clustering_DEG_genes)[order(filtered_clustering_DEG_genes$qval)][1:268]

APC <- setOrderingFilter(APC, ordering_genes = APC_ordering_genes)

APC <- reduceDimension(APC, method = 'DDRTree', max_components = 2,num_dim=20)

APC <- orderCells(APC)

APC <- orderCells(APC, root_state = GM_state(APC))
plot_cell_trajectory(APC, color_by = "Pseudotime",show_branch_points = F) +scale_color_viridis(option = "cividis")
plot_cell_trajectory(APC, color_by = "Cluster",show_branch_points = F)
plot_cell_trajectory(APC, color_by = "new.Antigen.Presenting.subsets",show_branch_points = F)
```


```{r}

filtered_alt_clustering_DEG_genes <- subset(alt_clustering_DEG_genes, p_val_adj < 0.05)

alt_APC_ordering_genes <-row.names(filtered_alt_clustering_DEG_genes)[order(filtered_alt_clustering_DEG_genes$avg_logFC)][1:344]

APC <- setOrderingFilter(APC, ordering_genes = alt_APC_ordering_genes)

APC <- reduceDimension(APC, method = 'DDRTree', max_components = 2,num_dim=20)

APC <- orderCells(APC, root_state = GM_state(APC))
plot_cell_trajectory(APC, color_by = "Pseudotime",theta = 180,show_branch_points = F) +scale_color_viridis(option = "cividis")
plot_cell_trajectory(APC, color_by = "Cluster",theta = 180,show_branch_points = F)
plot_cell_trajectory(APC, color_by = "new.Antigen.Presenting.subsets",theta = 180,show_branch_points = F)
```

Creating a new dataset called MACs in Monocle from just the MACs

```{r warning=F, message=F}
#new cell type ids: "InfDC.cDC2.CD1C","remove","Monocytes.CD14","pDC.DC6.LAMP3","cDC1.CLEC9A","M1.like.macrophages","M2.like.macrophages","Monocytes.CD14.CD16"
MACs.Seurat <- subset(APC.Seurat, idents = c("InfDC.cDC2.CD1C","pDC.DC6.LAMP3","cDC1.CLEC9A"), invert = T)

MACs <- as.CellDataSet(MACs.Seurat, reduction = "umap")

MACs  
```


```{r}
MACs <- estimateSizeFactors(MACs)
MACs <- estimateDispersions(MACs)
MACs <- detectGenes(MACs, min_expr = 0.1)
print(head(fData(MACs)))
```

```{r}
expressed_genes <- row.names(subset(fData(MACs), num_cells_expressed >= 10))
```

```{r}
head(pData(MACs))
pData(MACs)$Total_mRNAs <- Matrix::colSums(exprs(MACs))
upper_bound <- 10^(mean(log10(pData(MACs)$Total_mRNAs)) +
                     2*sd(log10(pData(MACs)$Total_mRNAs)))
lower_bound <- 10^(mean(log10(pData(MACs)$Total_mRNAs)) -
                     2*sd(log10(pData(MACs)$Total_mRNAs)))
```

```{r}
qplot(Total_mRNAs, data = pData(MACs), geom =
        "density", color=new.Antigen.Presenting.subsets) +
  geom_vline(xintercept = lower_bound) +
  geom_vline(xintercept = upper_bound)
```


```{r}
pData(MACs)$CellType <- pData(MACs)$new.Antigen.Presenting.subsets
pie <- ggplot(pData(MACs), aes(x = factor(1), fill = factor(CellType))) + geom_bar(width = 1)
pie + coord_polar(theta = "y") + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
```
From Monocle:
We recommend users order cells using genes selected by an unsupervised procedure called "dpFeature".
To use dpFeature, we first select superset of feature genes as genes expressed in at least 5% of all the cells.
```{r}
MACs <- detectGenes(MACs, min_expr = 0.1)
fData(MACs)$use_for_ordering <-
    fData(MACs)$num_cells_expressed > 0.05 * ncol(MACs)
```
Then we will perform a PCA analysis to identify the variance explained by each PC (principal component). We can look at a scree plot and determine how many pca dimensions are wanted based on whether or not there is a significant gap between that component and the component after it. By selecting only the high loading PCs, we effectively only focus on the more interesting biological variations.
```{r}
plot_pc_variance_explained(MACs, return_all = F)
```
We will then run reduceDimension with t-SNE as the reduction method on those top PCs and project them further down to two dimensions. 
I took 15 PCs based on the Scree Plot above. 

```{r}
MACs <- reduceDimension(MACs,
                              max_components = 2,
                              norm_method = 'log',
                              num_dim = 15,
                              reduction_method = 'tSNE',
                              verbose = T)

```
Then we can run density peak clustering to identify the clusters on the 2-D t-SNE space. The densityPeak algorithm clusters cells based on each cell's local density (Ρ) and the nearest distance (Δ) of a cell to another cell with higher distance. We can set a threshold for the Ρ, Δ and define any cell with a higher local density and distance than the thresholds as the density peaks. Those peaks are then used to define the clusters for all cells. By default, clusterCells choose 95% of Ρ and Δ to define the thresholds. We can also set a number of clusters (n) we want to cluster. In this setting, we will find the top n cells with high Δ with Δ among the top 50% range. The default setting often gives good clustering.
```{r}
MACs <- clusterCells(MACs, verbose = F)
```

```{r}
plot_cell_clusters(MACs, color_by = 'as.factor(Cluster)')
```

```{r}
plot_cell_clusters(MACs, color_by = 'as.factor(new.Antigen.Presenting.subsets)')
```
We also provide the decision plot for users to check the Ρ, Δ for each cell and decide the threshold for defining the cell clusters.
```{r}
plot_rho_delta(MACs, rho_threshold = 30, delta_threshold = 4 )
```
We could then re-run clustering based on the user defined threshold. To facilitate the computation, we can set (skip_rho_sigma = T) which enables us to skip the calculation of the Ρ, Σ.
```{r}
MACs <- clusterCells(MACs,
                 rho_threshold = 9,
                 delta_threshold = 9,
                 skip_rho_sigma = T,
                 verbose = F)
```

```{r}
plot_cell_clusters(MACs, color_by = 'as.factor(Cluster)')
```

```{r}
plot_cell_clusters(MACs, color_by = 'as.factor(new.Antigen.Presenting.subsets)')
```
After we confirm the clustering makes sense, we can then perform differential gene expression test as a way to extract the genes that distinguish them.
```{r}
MACs_clustering_DEG_genes <- differentialGeneTest(MACs[expressed_genes,], fullModelFormulaStr = '~new.Antigen.Presenting.subsets',cores = 3)
alt_MACs_clustering_DEG_genes <- FindAllMarkers(MACs.Seurat, only.pos = T, logfc.threshold = 0.2)
```
We will then select the top X significant genes as the ordering genes.
```{r}
filtered_MACs_clustering_DEG_genes <- subset(MACs_clustering_DEG_genes,qval < 1e-4 & use_for_ordering =="TRUE")

MACs_ordering_genes <- row.names(filtered_MACs_clustering_DEG_genes)[order(filtered_MACs_clustering_DEG_genes$qval)][1:103]

MACs <- setOrderingFilter(MACs, ordering_genes = MACs_ordering_genes)

MACs <- reduceDimension(MACs, method = 'DDRTree', max_components = 2,num_dim=15)

MACs <- orderCells(MACs)

MACs <- orderCells(MACs, root_state = GM_state(MACs))
plot_cell_trajectory(MACs, color_by = "Pseudotime",theta = 135,show_branch_points = F) +scale_color_viridis(option = "cividis")
plot_cell_trajectory(MACs, color_by = "Cluster",theta = 135,show_branch_points = F)
plot_cell_trajectory(MACs, color_by = "new.Antigen.Presenting.subsets",theta = 135,show_branch_points = F)
```


```{r}

filtered_alt_MACs_clustering_DEG_genes <- subset(alt_MACs_clustering_DEG_genes, p_val_adj < 0.05)

alt_MACs_ordering_genes <-row.names(filtered_alt_MACs_clustering_DEG_genes)[order(filtered_alt_MACs_clustering_DEG_genes$avg_logFC)][1:91]

MACs <- setOrderingFilter(MACs, ordering_genes = alt_MACs_ordering_genes)

MACs <- reduceDimension(MACs, method = 'DDRTree', max_components = 2,num_dim=15)

MACs <- orderCells(MACs, root_state = GM_state(MACs))

plot_cell_trajectory(MACs, color_by = "Pseudotime",theta = 135,show_branch_points = F) +scale_color_viridis(option = "cividis")
plot_cell_trajectory(MACs, color_by = "Cluster",theta = 135,show_branch_points = F)
plot_cell_trajectory(MACs, color_by = "new.Antigen.Presenting.subsets",theta = 135,show_branch_points = T)

plot_cell_trajectory(MACs, color_by = "Pseudotime",show_branch_points = T) +scale_color_viridis(option = "cividis")
plot_cell_trajectory(MACs, color_by = "new.Antigen.Presenting.subsets",show_branch_points = T)

```

```{r}
# cluster the top 50 genes that vary as a function of pseudotime
filtered_alt_MACs_clustering_DEG_genes %>% arrange(p_val_adj) %>% head(50) %>% select(gene) -> gene_to_cluster
gene_to_cluster <- gene_to_cluster$gene

my_pseudotime_cluster <- plot_pseudotime_heatmap(MACs[gene_to_cluster,], num_clusters = 5,cores = 4,show_rownames = TRUE,return_heatmap = TRUE)

plot_pheatmap(my_pseudotime_cluster) # Works in Console only

```

```{r}
pseudotime_monocle <-
    data.frame(
        Cell.type = phenoData(MACs)$new.Antigen.Presenting.subsets,
        pseudotime = phenoData(MACs)$Pseudotime,
        Class = phenoData(MACs)$Subsets
    )

ggplot(pseudotime_monocle, 
       aes(x = pseudotime, 
           y = Cell.type, colour = Cell.type))+
    geom_quasirandom(groupOnX = F) + theme_classic() +
    xlab("monocle pseudotime") + ylab("Cell types") +
    ggtitle("Cells ordered by monocle pseudotime")

ggplot(pseudotime_monocle)+ 
  geom_density(aes(x = pseudotime,fill = Cell.type,alpha =0.3,color=Cell.type))+
  theme_classic() +
    xlab("monocle pseudotime") + ylab("Cell types") +
    ggtitle("Cells ordered by monocle pseudotime")

ggplot(pseudotime_monocle) +
  geom_jitter(aes(pseudotime,Class, color = Cell.type),height = 0.02) + theme_classic() +
    xlab("monocle pseudotime")


```


```{r}
BEAM_res <- BEAM(MACs, branch_point = 1, cores = 4,verbose = T)
#error message: the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be used

BEAM_res <- BEAM_res[order(BEAM_res$qval),]

BEAM_res <- BEAM_res[,c("gene_short_name", "pval", "qval")]
```

```{r}
MACs.Beam.1 <- MACs[row.names(subset(BEAM_res,qval < 5e-2)),]
```

```{r}

#To store the pheatmap odject - Error in if (nrow(ancestor_exprs) == 1) exprs_data <- t(as.matrix(ancestor_exprs)) else exprs_data <- ancestor_exprs : argument is of length zero
Plot1 <- plot_genes_branched_heatmap(MACs.Beam.1,branch_point = 1,
                            num_clusters = 4,
                            cores = 4,
                            use_gene_short_name = TRUE,
                            show_rownames = TRUE,
                            return_heatmap = TRUE)

#Use this to plot out the heatmap
plot_pheatmap(plot_genes_branched_heatmap(MACs.Beam.1,branch_point = 1,
                            num_clusters = 3,
                            cores = 4,
                            use_gene_short_name = TRUE,
                            show_rownames = TRUE,
                            return_heatmap = TRUE))
```

```{r}
BEAM_res <- BEAM(MACs, branch_point = 2, cores = 4)

BEAM_res <- BEAM_res[order(BEAM_res$qval),]

BEAM_res <- BEAM_res[,c("gene_short_name", "pval", "qval")]
```

```{r}
MACs.Beam.2 <- MACs[row.names(subset(BEAM_res,qval < 5e-2)),]
```

```{r}
#To store the pheatmap odject
Plot1 <- plot_genes_branched_heatmap(MACs.Beam.2,branch_point = 2,
                            num_clusters = 4,
                            cores = 4,
                            use_gene_short_name = TRUE,
                            show_rownames = TRUE,
                            return_heatmap = TRUE)

#Use this to plot out the heatmap
plot_pheatmap(plot_genes_branched_heatmap(MACs.Beam.2,branch_point = 2,
                            num_clusters = 5,
                            cores = 4,
                            use_gene_short_name = TRUE,
                            show_rownames = TRUE,
                            return_heatmap = TRUE))

#why collagen/dcn expression in macrophages: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5029092/ -Peritoneal macrophages and septic inflammation?
#https://www.nature.com/articles/s41467-019-14263-2 - macrophages expressing collagens fibrotic signature following cardiac injury.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3776354/ - M2 macrophages and collagen turnover
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6150476/ - IGM expressing TAMs
```

```{r}
BEAM_res <- BEAM(MACs, branch_point = 3, cores = 4)

BEAM_res <- BEAM_res[order(BEAM_res$qval),]

BEAM_res <- BEAM_res[,c("gene_short_name", "pval", "qval")]
```

```{r}
MACs.Beam.3 <- MACs[row.names(subset(BEAM_res,qval < 5e-2)),]
```

```{r}
#To store the pheatmap odject
Plot1 <- plot_genes_branched_heatmap(MACs.Beam.3,branch_point = 3,
                            num_clusters = 4,
                            cores = 4,
                            use_gene_short_name = TRUE,
                            show_rownames = TRUE,
                            return_heatmap = TRUE)

#Use this to plot out the heatmap
plot_pheatmap(plot_genes_branched_heatmap(MACs.Beam.3,branch_point = 3,
                            num_clusters = 5,
                            cores = 4,
                            use_gene_short_name = TRUE,
                            show_rownames = TRUE,
                            return_heatmap = TRUE))


```


```{r}

MAC_genes <- row.names(subset(fData(MACs),
          gene_short_name %in% c("MMP12", "S100A8", "CCL4")))

plot_genes_branched_pseudotime(MACs[MAC_genes,],
                       branch_point = 3,
                       color_by = "new.Antigen.Presenting.subsets",
                       ncol = 1)

```


The code below is for analysis of the cell line analysis of PDE5 treated CAFs in co-culture with MFD1 OAC cells.


Now we create the seurat object with a minimum of 200 genes(features) per cell and genes expressed in a minimum of 5 cells. We use the metrics file as meta data in the seurat object.
```{r}
PDE5 <- CreateSeuratObject(counts = read.table("~/Desktop/PDE5/Annettes_merged_2.dge.txt.gz", sep="\t", header=TRUE, row.names=1), min.cells = 5, min.features = 200, 
                           #meta.data = read.table("~/Desktop/PDE5/PDE5.meta.data.csv", header = T, sep = ",", row.names = 1)
)

```

Now load a list of housekeeping genes.
```{r}
housekeepers <- c("ACTB","B2M","HNRPLL","HPRT","PSMB2","PSMB4","PPIA","PRPS1","PRPS1L1","PRPS1L3","PRPS2","PRPSAP1","PRPSAP2","RPL10","RPL10A","RPL10L","RPL11","RPL12","RPL13","RPL14","RPL15","RPL17","RPL18","RPL19","RPL21","RPL22","RPL22L1","RPL23","RPL24","RPL26","RPL27","RPL28","RPL29","RPL3","RPL30","RPL32","RPL34","RPL35","RPL36","RPL37","RPL38","RPL39","RPL39L","RPL3L","RPL4","RPL41","RPL5","RPL6","RPL7","RPL7A","RPL7L1","RPL8","RPL9","RPLP0","RPLP1","RPLP2","RPS10","RPS11","RPS12","RPS13","RPS14","RPS15","RPS15A","RPS16","RPS17","RPS18","RPS19","RPS20","RPS21","RPS24","RPS25","RPS26","RPS27","RPS27A","RPS27L","RPS28","RPS29","RPS3","RPS3A","RPS4X","RPS5","RPS6","RPS6KA1","RPS6KA2","RPS6KA3","RPS6KA4","RPS6KA5","RPS6KA6","RPS6KB1","RPS6KB2","RPS6KC1","RPS6KL1","RPS7","RPS8","RPS9","RPSA","TRPS1","UBB")
```
Identify which of those genes are in the seurat object.
```{r}
housekeepers.genes.PDE5 <- housekeepers[housekeepers %in% rownames(x= PDE5)]
```
Calculate for each cell the percentage of genes that have come from the housekeeper list.
```{r}
PDE5[["percent.housekeepers"]] <- PercentageFeatureSet(PDE5, features = housekeepers.genes.PDE5)
```
Then plot a histogram
```{r}
ggplot(PDE5@meta.data) + geom_histogram(aes(percent.housekeepers, fill = ..count..), bins = 1000) + scale_fill_viridis_c("Count", option = "plasma") + labs(title = "PDE5") + xlab("Percentage Housekeeper Gene Content") + ylab("Cell Count")+ xlim(0,30) + ylim(0,15)
```

Add in a dissociation genelist
```{r}
dissociation <- c("ACTG1","ANKRD1","ARID5A","ATF3","ATF4","BAG3","BHLHE40","BRD2","BTG1","BTG2","CCNL1","CCRN4L","CEBPB","CEBPD","CEBPG","CSRNP1","CXCL1","CYR61","DCN","DDX3X","DDX5","DES","DNAJA1","DNAJB1","DNAJB4","DUSP1","DUSP8","EGR1","EGR2","EIF1","EIF5","ERF","ERRFI1","FAM132B","FOS","FOSB","FOSL2","GADD45A","GCC1","GEM","H3F3B","HIPK3","HSP90AA1","HSP90AB1","HSPA1A","HSPA1B","HSPA5","HSPA8","HSPB1","HSPH1","ID3","IDI1","IER2","IER3","IFRD1","IL6","IRF1","IRF8","ITPKC","JUN","JUNB","JUND","KLF2","KLF4","KLF6","KLF9","LITAF","LMNA","MAFF","MAFK","MCL1","MIDN","MT1G","MT1B","MT1E","MT1F","MT1H","MT1A","MT1M","MT1X","MT1HL1","MT2","MYADM","MYC","MYD88","NCKAP5L","NCOA7","NFKBIA","NFKBIZ","NOP58","NPPC","NR4A1","ODC1","OSGIN1","OXNAD1","PCF11","PDE4B","PER1","PHLDA1","PNP","PNRC1","PPP1CC","PPP1R15A","PXDC1","RAP1B","RASSF1","RHOB","RHOH","RIPK1","SAT1","SBNO2","SDC4","SERPINE1","SKIL","SLC10A6","SLC38A2","SLC41A1","SOCS3","SQSTM1","SRF","SRSF5","SRSF7","STAT3","TAGLN2","TIPARP","TNFAIP3","TNFAIP6","TPM3","TPPP3","TRA2A","TRA2B","TRIB1","TUBB4B","TUBB6","UBC","USP2","WAC","ZC3H12A","ZFAND5","ZFP36","ZFP36L1","ZFP36L2","ZYX","GADD45G","HSPE1","IER5","KCNE4")
```

Find dissociaiton genes in the genes kept in the Seurat object and score each cell as above.
```{r}
dissociation.genes.PDE5 <- dissociation[dissociation %in% rownames(x= PDE5)]
PDE5[["percent.dissociation"]] <- PercentageFeatureSet(PDE5, features = dissociation.genes.PDE5)
```

Calculate what percentage of the cells will be annotated as dissociation-affected cells with cutoff value 10
```{r}
DissociationCutoff <- 10
PDE5@meta.data$Dissociation_affected <- NULL
PDE5@meta.data$Dissociation_affected <- ifelse(PDE5@meta.data$percent.dissociation >= DissociationCutoff,1,0)
PercentageDissociationAffected <- round((nrow(PDE5@meta.data[PDE5@meta.data$percent.dissociation >= DissociationCutoff, ]))/nrow(PDE5@meta.data), digits = 2)
print(c("Percentage of PDE5 cells annotated as dissociation-affected cells is", PercentageDissociationAffected))
```

Plot a histogram 
```{r}
p1 <- ggplot(PDE5@meta.data) + geom_histogram(aes(percent.dissociation, fill = ..count..), bins = 1000) + scale_fill_viridis_c("Count", option = "plasma") + geom_vline(xintercept = c(DissociationCutoff), color = c("black"), show.legend = T) + labs(title = "PDE5") + xlab("Percentage Dissociation Gene Content") + ylab("Cell Count")+ xlim(0,15) + ylim(0,15)

p1
```

Calculate Mitochondrial percentage
```{r}
PDE5[["percent.mito"]] <- PercentageFeatureSet(PDE5, pattern = "^MT-")
```
Calculate what percentage of the cells will be annotated as dying cells with cutoff value 25
```{r}
Dying.Cell.Cutoff <- 10
PDE5@meta.data$Dying<- NULL
PDE5@meta.data$Dying <- ifelse(PDE5@meta.data$percent.mito >= Dying.Cell.Cutoff,1,0)
PercentageDying <- round((nrow(PDE5@meta.data[PDE5@meta.data$percent.mito >= Dying.Cell.Cutoff, ]))/(nrow(PDE5@meta.data))*100, digits = 2)
print(c("Percentage of PDE5 cells annotated as dying cells is", PercentageDying))
```
Visualise the data with a violin plot
```{r}
VlnPlot(PDE5, features = "percent.mito", pt.size = 0, sort = T)+ geom_hline(yintercept = c(Dying.Cell.Cutoff), color = c("black")) + labs(x = "", y = "Percent Mitochondrial Content",  title = "Percent Mitochondrial Content per Cell") + theme(axis.title.x = element_text(color = "black"), axis.title.y=element_text(color= "black")) + geom_boxplot(width =0.2, fill = "grey", alpha = 0.5)
```
Quick Tidy
```{r}
rm(PercentageDissociationAffected, DissociationCutoff, PercentageDying, Dying.Cell.Cutoff, dissociation, dissociation.genes.PDE5, housekeepers, housekeepers.genes.PDE5, p1)
```
Calculate mean, median and quantiles for number of genes per cell and plot as a Violin plot as well. 
```{r}
median <- median(PDE5@meta.data$nFeature_RNA)
mean <- mean(PDE5@meta.data$nFeature_RNA)

quantile.1 <- quantile(PDE5@meta.data$nFeature_RNA, 1/4)
quantile.3 <- quantile(PDE5@meta.data$nFeature_RNA, 3/4)

p3 <- VlnPlot(PDE5, features = "nFeature_RNA", pt.size = 0) + geom_hline(yintercept = c(median, quantile.1, quantile.3)) + labs(x = "", y = "Counts",  title = "Genes per Cell PDE5") + theme(axis.title.x = element_text(color = "black"), axis.title.y=element_text(color= "black"))

p3
```

Add cell cycle scores to each cell from (Tirosh et al.2015)
```{r}
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
PDE5 <- CellCycleScoring(object = PDE5, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE, nbin = 18)
```
Tidy up
```{r}
rm(s.genes, g2m.genes)
```
Subset the data filtering based on what we have observed.
```{r}
PDE5 <- subset(PDE5, subset = nFeature_RNA > 200 & nFeature_RNA < 7500 & percent.mito < 10)
Idents(PDE5) <- "orig.ident"
table(PDE5@active.ident)
```
Re-calculate median, mean and quantiles.
```{r}
median1 <- median(PDE5@meta.data$nFeature_RNA)

mean1 <- mean(PDE5@meta.data$nFeature_RNA)

quantile1.1 <- quantile(PDE5@meta.data$nFeature_RNA, 1/4)
quantile1.3 <- quantile(PDE5@meta.data$nFeature_RNA, 3/4)

```
Set Cut off's for 4 nGene Groups based on median and quartiles
```{r}
PDE5.25 <- WhichCells(PDE5,  expression = (quantile1.1+1) > nFeature_RNA & nFeature_RNA > -Inf)

PDE5.50 <- WhichCells(PDE5,  expression = (median1+1) > nFeature_RNA & nFeature_RNA > quantile1.1)

PDE5.75 <- WhichCells(PDE5,  expression = (quantile1.3+1) > nFeature_RNA & nFeature_RNA > median1)

PDE5.100 <- WhichCells(PDE5, expression = Inf > nFeature_RNA & nFeature_RNA > quantile1.3)

PDE5 <-SetIdent(PDE5, cells =PDE5.25, value = "1st")  
PDE5 <-SetIdent(PDE5, cells =PDE5.50, value = "2nd")
PDE5 <-SetIdent(PDE5, cells =PDE5.75, value = "3rd")
PDE5 <-SetIdent(PDE5, cells =PDE5.100, value = "4th")
PDE5[["nGene.Groups"]] <- Idents(object = PDE5)
```

```{r}
rm(PDE5.25, PDE5.50, PDE5.75, PDE5.100)
```

Normalise using scTransform, here scaled on Mitotic phase
```{r, message=FALSE, warning=FALSE}
Idents(PDE5) <- "orig.ident"
PDE5 <- RenameIdents(PDE5, "CAF" = "PDE5i", "CAFv" = "Control", "MC" = "PDE5i", "MCv" = "Control", "MFD1"= "PDE5i", "MFD1v" = "Control" )
PDE5[["stim"]] <- Idents(object = PDE5)
PDE5.list <- SplitObject(PDE5, split.by = "stim")
PDE5.list <- lapply(X = PDE5.list, FUN = function(x) {
    x <- SCTransform(x, verbose = FALSE, variable.features.n = 2000, vars.to.regress = "Phase")
})
```

```{r}
PDE5.features <- SelectIntegrationFeatures(object.list = PDE5.list, nfeatures = 3000)
PDE5.list <- PrepSCTIntegration(object.list = PDE5.list, anchor.features = PDE5.features, 
    verbose = FALSE)
PDE5.anchors <- FindIntegrationAnchors(object.list = PDE5.list,normalization.method = "SCT", anchor.features = PDE5.features,)
rm(PDE5)
PDE5 <- IntegrateData(anchorset = PDE5.anchors,normalization.method = "SCT")
```
Then straight into principle component analysis(PCA).
```{r}
PDE5 <- RunPCA(PDE5, npcs = 20)
```


Then plot the first principle component
```{r}
p1 <- DimHeatmap(PDE5, dims = 1, cells = 100, balanced = TRUE, fast = F)

p1
```
And see which principle components are contributing the most to the variability.
```{r,fig.align = "center"}
p2 <- ElbowPlot(PDE5, ndims = 20)

p2
```
We then use the PCA's as input to the FindNeighbours function which constructs a Shared-neares-neighbour graph. First by determining the k-nearest neighbors of each cell. Then using this knn graph to construct the SNN graph by calculating the neighborhood overlap (Jaccard index) between every cell and its k.param nearest neighbors.
```{r}
PDE5 <- FindNeighbors(PDE5, dims = 1:10)
```
Then we cluster the cells based on the Louvain algorithm for identifying highly interconnected communities in large complex networks. It was developed and tested on 2 million Belgian mobile phone customers at the univerity of Louvain in Belgium. The best modularity is determined using an opitimisation algorithm by Waltman and van Eck and we use a number of different resolutions to see for ourselves which resolution works best.
```{r}
PDE5 <- FindClusters(PDE5, resolution = c(0.5:2.5))
```
In order to display this highly dimensional data in a 2d image we perform dimensionality reduction usin the Uniform Manifold Approximation and Projection (UMAP) technique. This technique preserves the global structure and the local interconnection of points. 
```{r}
PDE5 <- RunUMAP(PDE5, dims = 1:10, seed.use = 111)
```

```{r}
p1 <- DimPlot(PDE5, reduction = "umap", group.by = "seurat_clusters", pt.size = 2, label = T) +NoLegend()

plot_grid(p1, ncol = 1, labels = "PDE5 with resolution of 2.5")
```
T-distributed stochastic neighbour embedding (t-SNE) is an alternative dimensionality reduction technique which is good at identifying local interconection it is poor at illustrating global structure and so points that are near on the graph may not represent near neighbours. I don't use anymore
```{r}
#PDE5 <- RunTSNE(PDE5, dims = 1:10, seed.use = 111)
#p1 <- DimPlot(PDE5, reduction = "tsne", group.by = "seurat_clusters", pt.size = 2)

#plot_grid(p1, ncol = 1, labels = "tSNE of PDE5 with resolution of 2.5")
```

```{r}
Idents(PDE5) <- "integrated_snn_res.0.5"
p1 <- DimPlot(PDE5, reduction = "umap", group.by = "integrated_snn_res.0.5", pt.size = 2, label = T)

plot_grid(p1, ncol = 1, labels = "UMAP of PDE5 with resolution of 0.5")
```
We then look at differential gene expression between the clusters at the current resolution. The default test for differential expression is the wilcoxon rank sum test (identical to the Mann-Whitney U test). Using the FindAllMarkers function we are comparing each cluster with all other clusters.
```{r}
DefaultAssay(PDE5) <- "RNA"
Idents(PDE5) <- "integrated_snn_res.0.5"
PDE5.Markers.0.5 <- FindAllMarkers(PDE5, min.pct = 0.25, logfc.threshold = 0.25, verbose = F, only.pos = T) 
```

We then look at the top 10 markers by the average log fold change in that gene in that cluster and plot a heatmap.
```{r, message=TRUE, warning=TRUE}
DefaultAssay(PDE5) <- "SCT"
top10 <- PDE5.Markers.0.5 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(PDE5, features = top10$gene) + NoLegend()
```

```{r}
DoHeatmap(PDE5, features = top10$gene, group.by = "orig.ident") + NoLegend()
```

Then using the top ten genes and our knowledge we can then classify the cells. 
We then set a new identity within the Seurat object based on our new labels.
```{r}
Idents(PDE5) <- "integrated_snn_res.0.5"
idents.list = as.character(0:7)
Cell.Types = list("Fibroblasts","Cancer","Cancer","Cancer","Fibroblasts","Fibroblasts","Cancer","Fibroblasts")
for (i in 1:length(idents.list)) {PDE5 <- SetIdent(PDE5, cells = WhichCells(PDE5, idents = idents.list[[i]]), value = Cell.Types[[i]])}
PDE5[["Cell.Types"]] <- Idents(PDE5)


Idents(PDE5) <- "orig.ident"
idents.list <- list("MFD1v", "CAFv",  "MCv",   "MFD1", "CAF",  "MC")
Cell.Culture = list("Cancer","Fibroblasts","Mixed","Cancer","Fibroblasts","Mixed")


for (i in 1:length(idents.list)) {PDE5 <- SetIdent(PDE5, cells = WhichCells(PDE5, idents = idents.list[[i]]), value = Cell.Culture[[i]])}
PDE5[["Cell.Culture"]] <- Idents(PDE5)

```

```{r}
colnames(PDE5@meta.data)
```

```{r}
p1 <- DimPlot(PDE5, pt.size = 0.5, label = F, group.by = "Phase", label.size = 4) +theme(axis.title.y = element_blank()) +labs(x = "Cell Cycle Phase") 
p2 <- DimPlot(PDE5, pt.size = 0.5, label = F, group.by = "Cell.Types", label.size = 4) +theme(axis.title.y = element_blank()) +labs(x = "Cell Types") 
"integrated_snn_res.0.5"
p3<- DimPlot(PDE5, pt.size = 0.5, label = F, group.by = "integrated_snn_res.0.5", label.size = 4) +theme(axis.title.y = element_blank()) +labs(x = "Cell Types") 
p4 <- DimPlot(PDE5, pt.size = 0.5, label = F, group.by = "Cell.Culture", label.size = 4) +theme(axis.title.y = element_blank()) +labs(x = "Cell Culture Conditions")
plot_grid(p1,p2,p3,p4)
```

I missed this bit. You have to change the default assay to RNA before doing the differential gene expression testing. My fault.....

```{r}
#Idents(PDE5) <- "integrated_snn_res.0.5"
#DefaultAssay(PDE5) <- "RNA"
#PDE5.Conserved.Markers <- FindConservedMarkers(PDE5, ident.1 = c(7), grouping.var = "stim")
```


```{r}
Idents(PDE5) <- "integrated_snn_res.0.5"
Co.culture.CAFs <- WhichCells(PDE5, idents = "5")
MyoCAFs <- subset(PDE5, cells = Co.culture.CAFs)

Idents(MyoCAFs) <- "stim"
Stim.Markers <- FindAllMarkers(MyoCAFs, only.pos = T)
#Stim.Markers.MAST <- FindAllMarkers(MyoCAFs, test.use = "MAST")
#Stim.Markers.DESeq2 <- FindAllMarkers(MyoCAFs, only.pos = T, test.use = "DESeq2")
```

```{r}
MyoCAFs@assays
```
Change features here to look at different gene expression levels.
```{r}
features <- "ACTA2"

v1<- VlnPlot(MyoCAFs, 
        features = features,
        assay = "RNA", 
        pt.size = 0.2, 
        group.by = "stim",
        ) + labs(x = "") + labs(title = paste0("Raw Data", "\n", features)) + 
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5)
v2 <- VlnPlot(MyoCAFs, 
        features = features,
        assay = "SCT",
        pt.size = 0.2, 
        group.by = "stim",
        )  + labs(x = "") + labs(title = paste0("SC Transform", "\n", features))+ 
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5)

v3<- VlnPlot(MyoCAFs, 
        features = features,
        assay = "integrated", 
        pt.size = 0.2, 
        group.by = "stim",
        ) + labs(x = "") + labs(title = paste0("Integrated", "\n", features)) + 
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5)
plot_grid(v1,v2,v3)
```
```{r}
cGMP.pathways <- read.table(file = "~/Desktop/R/GeneSets/cGMP.pathways.txt", sep = "\t", blank.lines.skip = TRUE, header = T)
#vector of hallmark gene set names
path_names <- colnames(cGMP.pathways)
# test version
#sig_names_short <- sig_names[1:5]

for (i in path_names) {
  cd_features <- assign(i, as.vector(cGMP.pathways[i]))
  cd_features <- na.omit(cd_features)
  PDE5 <- AddModuleScore(
    object = PDE5,
    nbin=10,
    features = cd_features,
    ctrl = 100,
    name = i,
    search = TRUE)
  colnames(PDE5@meta.data) <-
    gsub(x = colnames(PDE5@meta.data)
         , pattern = paste0(i,1)
         , replacement = i
    )
}
colnames(PDE5@meta.data)


```
```{r}
features <- colnames(cGMP.pathways)
VlnPlot(MyoCAFs, features= features, split.by = "stim", pt.size = 0.2, multi.group = T)
```
```{r}
v1 <- VlnPlot(MyoCAFs, 
              features = "ACTA2",
              assay = "RNA",
              pt.size = 0.2, 
              sort=F,
)  + labs(x = "") + labs(title = paste0("ACTA2 \n Alpha Smooth Muscle Actin"))+ 
  theme(axis.title.x = element_text(face = "bold")) +
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5) +
  stat_compare_means(label.x = 1.5, label.y = 150)

v2 <- VlnPlot(MyoCAFs, 
              features = "TAGLN",
              assay = "RNA",
              pt.size = 0.2, 
              sort=F,
)  + labs(x = "") + labs(title = paste0("TAGLN \n Transgelin"))+ 
  theme(axis.title.x = element_text(face = "bold")) +
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5) +
  stat_compare_means(label.x = 1.5, label.y = 150)
v3 <- VlnPlot(MyoCAFs, 
              features = "TGFBI",
              assay = "RNA",
              pt.size = 0.2, 
              sort=F,
)  + labs(x = "") + labs(title = paste0("TGFBI \n Transforming Growth Factor Beta induced"))+ 
  theme(axis.title.x = element_text(face = "bold")) +
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5) +
  stat_compare_means(label.x = 1.5, label.y = 150)

v4 <- VlnPlot(MyoCAFs, 
              features = "CALD1",
              assay = "RNA",
              pt.size = 0.2, 
              sort=F,
)  + labs(x = "") + labs(title = paste0("CALD1 \n Caldesmon 1"))+ 
  theme(axis.title.x = element_text(face = "bold")) +
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5) +
  stat_compare_means(label.x = 1.5, label.y = 150)

fig <- plot_grid(v1, v2, v3, v4)
fig
```

```{r}
colnames(PDE5@meta.data)
Idents(PDE5) <- "Cell.Culture"
Mono.Fibs <- subset(PDE5, idents = "Fibroblasts")
Idents(Mono.Fibs) <- "stim"
Stim.Markers.Mono.Fibs <- FindAllMarkers(Mono.Fibs, only.pos = T)
write.csv(Stim.Markers.Mono.Fibs, file = "~/Desktop/Mono.Fibs.csv")

VlnPlot(PDE5, 
        features = "ACTA2",
        assay = "RNA",
        pt.size = 0.2,
        sort=F,
        split.by = "stim",
        )  + 
  labs(x = "") + 
  labs(title = paste0("ACTA2 \n Alpha Smooth Muscle Actin"))+ 
  theme(axis.title.x = element_text(face = "bold")) +
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5) +
  stat_compare_means(label.x = 1.5, label.y = 150)

```

```{r}
v1 <- VlnPlot(Mono.Fibs, 
              features = "ACTA2",
              assay = "RNA",
              pt.size = 0.2, 
              sort=F,
)  + labs(x = "") + labs(title = paste0("ACTA2 \n Alpha Smooth Muscle Actin"))+ 
  theme(axis.title.x = element_text(face = "bold")) +
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5) +
  stat_compare_means(label.x = 1.5, label.y = 150)

v2 <- VlnPlot(Mono.Fibs, 
              features = "TAGLN",
              assay = "RNA",
              pt.size = 0.2, 
              sort=F,
)  + labs(x = "") + labs(title = paste0("TAGLN \n Transgelin"))+ 
  theme(axis.title.x = element_text(face = "bold")) +
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5) +
  stat_compare_means(label.x = 1.5, label.y = 150)
v3 <- VlnPlot(Mono.Fibs, 
              features = "TGFBI",
              assay = "RNA",
              pt.size = 0.2, 
              sort=F,
)  + labs(x = "") + labs(title = paste0("TGFBI \n Transforming Growth Factor Beta induced"))+ 
  theme(axis.title.x = element_text(face = "bold")) +
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5) +
  stat_compare_means(label.x = 1.5, label.y = 150)

v4 <- VlnPlot(Mono.Fibs, 
              features = "CALD1",
              assay = "RNA",
              pt.size = 0.2, 
              sort=F,
)  + labs(x = "") + labs(title = paste0("CALD1 \n Caldesmon 1"))+ 
  theme(axis.title.x = element_text(face = "bold")) +
  NoLegend()+ 
  geom_boxplot(width =0.2, fill = "grey", alpha = 0.5) +
  stat_compare_means(label.x = 1.5, label.y = 150)

fig2 <- plot_grid(v1, v2, v3, v4)
fig2
```
```{r}
plot_grid(fig, fig2, labels = c("Co-Culture", "Mono-Culture"))
```


```{r}
Idents(PDE5) <- "orig.ident"
levels(PDE5)
PDE5 <- RenameIdents(PDE5, "CAF"="CAFs.Mono.PDE5", "CAFv" ="CAFs.Mono.Control", "MFD1v" = "MFD1s.Mono.Control", "MFD1" ="MFD1s.Mono.PDE5")
table(PDE5@active.ident)
```

```{r}
MyoMFD1s <- subset(PDE5, idents=c("MC", "MCv"))
Idents(MyoMFD1s) <- "Cell.Types"
MyoMFD1s <- subset(MyoMFD1s, idents=c("Cancer"))
levels(MyoMFD1s)
Idents(MyoMFD1s) <- "stim"
```

```{r}
CAFs.Co.Control <- WhichCells(MyoCAFs, idents = "Control")
PDE5 <- SetIdent(PDE5, cells = CAFs.Co.Control, value = "CAFs.Co.Control")
CAFs.Co.PDE5 <- WhichCells(MyoCAFs, idents = "PDE5i")
PDE5 <- SetIdent(PDE5, cells = CAFs.Co.PDE5, value = "CAFs.Co.PDE5")
MFD1s.Co.Control <- WhichCells(MyoMFD1s, idents = "Control")
PDE5 <- SetIdent(PDE5, cells = MFD1s.Co.Control, value = "MFD1s.Co.Control")
MFD1s.Co.PDE5 <- WhichCells(MyoMFD1s, idents = "PDE5i")
PDE5 <- SetIdent(PDE5, cells = MFD1s.Co.PDE5, value = "MFD1s.Co.PDE5")

rm(CAFs.Co.Control, CAFs.Co.PDE5, MFD1s.Co.Control, MFD1s.Co.PDE5)
```

```{r}
levels(PDE5)
```

```{r}
PDE5.old <- PDE5
PDE5 <- subset(PDE5, idents = "MC", invert = T)

Final.Markers <- FindAllMarkers(PDE5, only.pos = T)
PDE5 <- ScaleData(PDE5)
top10 <- Final.Markers %>% group_by(cluster) %>% top_n(n = 50, wt = avg_logFC)
my_levels <- c("MFD1s.Mono.Control","MFD1s.Mono.PDE5","MFD1s.Co.Control","MFD1s.Co.PDE5","CAFs.Mono.Control","CAFs.Mono.PDE5","CAFs.Co.Control","CAFs.Co.PDE5")
PDE5@active.ident <- factor(x = PDE5@active.ident, levels = my_levels)
DoHeatmap(subset(PDE5, downsample= 50), features = top10$gene)
```

```{r}
PDE5[["Cell.States"]] <- Idents(PDE5)
Idents(PDE5) <- "Cell.Types"
MFD1s <- subset(PDE5, idents = "Cancer")
CAFs <- subset(PDE5, idents = "Fibroblasts")
Idents(CAFs) <- "Cell.States"
CAFs.Markers <- FindAllMarkers(CAFs, only.pos = T)
top10 <- CAFs.Markers %>% group_by(cluster) %>% top_n(n = 50, wt = avg_logFC)
DoHeatmap(subset(CAFs, downsample= 50), features = top10$gene)+scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061")))
```

```{r}
DoHeatmap(subset(CAFs, downsample= 50), features = c("ACTA2", "TAGLN", "TGFBI", "CALD1", "IGFBP3"))
```

```{r}
Idents(PDE5) <- "Cell.States"
table(PDE5@active.ident)
```


```{r}
my_levels <- c("CAFs.Mono.Control","CAFs.Mono.PDE5","CAFs.Co.Control","CAFs.Co.PDE5")
CAFs@active.ident <- factor(x = CAFs@active.ident, levels = my_levels)

```

```{r}
comparisons = list(c("CAFs.Mono.Control","CAFs.Mono.PDE5"), c("CAFs.Co.Control","CAFs.Co.PDE5"), c("CAFs.Mono.Control", "CAFs.Co.Control"))
v1 <- VlnPlot(CAFs, 
        features = "ACTA2",
        assay = "RNA",
        pt.size = 0, 
        sort=F,
        log = T,
)  + labs(x = "", y = "") + labs(title = paste0("ACTA2"))+ 
  theme(plot.title = element_text(face = "bold.italic"), axis.title.x = element_text(face = "bold"), text = element_text(size = 25), axis.text = element_text(size = 20)) +
  NoLegend()+ 
  stat_compare_means(comparisons = comparisons, label.y = c(410, 350, 475), label = "p.signif") + 
  #stat_compare_means(label.x = 2.5, label.y = 475, size = 5) +
  geom_boxplot(width=0.1, fill = "grey", alpha = 0.5, outlier.size = 0) + 
  ylim(0,500)
v1
```

```{r}
v2 <- VlnPlot(CAFs, 
        features = "TAGLN",
        assay = "RNA",
        pt.size = 0, 
        sort=F,
        log = T,
)  + labs(x = "", y = "") + labs(title = paste0("TAGLN"))+ 
  theme(plot.title = element_text(face = "bold.italic"), axis.title.x = element_text(face = "bold"), text = element_text(size = 25), axis.text = element_text(size = 20)) +
  NoLegend()+ 
  stat_compare_means(comparisons = comparisons, label.y = c(410, 350, 475), label = "p.signif") + 
  #stat_compare_means(label.x = 2.5, label.y = 475, size = 5) +
  geom_boxplot(width=0.1, fill = "grey", alpha = 0.5, outlier.size = 0) + 
  ylim(0,500)
v2
```


```{r}
v3 <- VlnPlot(CAFs, 
        features = "TGFBI",
        assay = "RNA",
        pt.size = 0, 
        sort=F,
        log = T,
)  + labs(x = "", y = "") + labs(title = paste0("TGFBI"))+ 
  theme(plot.title = element_text(face = "bold.italic"), axis.title.x = element_text(face = "bold"), text = element_text(size = 25), axis.text = element_text(size = 20)) +
  NoLegend()+ 
  stat_compare_means(comparisons = comparisons, label.y = c(100, 200, 250), label = "p.signif") + 
  #stat_compare_means(label.x = 2.5, label.y = 475, size = 5) +
  geom_boxplot(width=0.1, fill = "grey", alpha = 0.5, outlier.size = 0) + 
  ylim(0,275)
v3
```

```{r}
v4 <- VlnPlot(CAFs, 
        features = "CALD1",
        assay = "RNA",
        pt.size = 0, 
        sort=F,
        log = T,
)  + labs(x = "", y = "") + labs(title = paste0("CALD1"))+ 
  theme(plot.title = element_text(face = "bold.italic"), axis.title.x = element_text(face = "bold"), text = element_text(size = 25), axis.text = element_text(size = 20)) +
  NoLegend()+ 
  stat_compare_means(comparisons = comparisons, label.y = c(410, 350, 475), label = "p.signif") + 
  #stat_compare_means(label.x = 2.5, label.y = 475, size = 5) +
  geom_boxplot(width=0.1, fill = "grey", alpha = 0.5, outlier.size = 0) + 
  ylim(0,500)
v4
```


```{r}
v5 <- VlnPlot(CAFs, 
        features = "IGFBP3",
        assay = "RNA",
        pt.size = 0, 
        sort=F,
        log = T,
)  + labs(x = "", y = "") + labs(title = paste0("IGFBP3"))+ 
  theme(plot.title = element_text(face = "bold.italic"), axis.title.x = element_text(face = "bold"), text = element_text(size = 25), axis.text = element_text(size = 20)) +
  NoLegend()+ 
  stat_compare_means(comparisons = comparisons, label.y = c(41, 55, 60), label = "p.signif") + 
  #stat_compare_means(label.x = 2.5, label.y = 475, size = 5) +
  geom_boxplot(width=0.1, fill = "grey", alpha = 0.5, outlier.size = 0) + 
  ylim(0,60)
v5
```
```{r}
v6 <- VlnPlot(CAFs, 
        features = "HLA-B",
        assay = "RNA",
        pt.size = 0, 
        sort=F,
        log = T,
)  + labs(x = "", y = "") + labs(title = paste0("HLA-B"))+ 
  theme(plot.title = element_text(face = "bold.italic"), axis.title.x = element_text(face = "bold"), text = element_text(size = 25), axis.text = element_text(size = 20)) +
  NoLegend()+ 
  stat_compare_means(comparisons = comparisons, label.y = c(140, 155, 200), label = "p.signif") + 
  #stat_compare_means(label.x = 2.5, label.y = 475, size = 5) +
  geom_boxplot(width=0.1, fill = "grey", alpha = 0.5, outlier.size = 0) + 
  ylim(0,200)
v6
```

```{r}
v7 <- VlnPlot(CAFs, 
        features = "TPM1",
        assay = "RNA",
        pt.size = 0, 
        sort=F,
        log = T,
)  + labs(x = "", y = "") + labs(title = paste0("TPM1"))+ 
  theme(plot.title = element_text(face = "bold.italic"), axis.title.x = element_text(face = "bold"), text = element_text(size = 25), axis.text = element_text(size = 20)) +
  NoLegend()+ 
  stat_compare_means(comparisons = comparisons, label.y = c(300, 400, 425), label = "p.signif") + 
  #stat_compare_means(label.x = 2.5, label.y = 475, size = 5) +
  geom_boxplot(width=0.1, fill = "grey", alpha = 0.5, outlier.size = 0) + 
  ylim(0,450)
v7
```

```{r}
v8 <- VlnPlot(CAFs, 
        features = "TPM2",
        assay = "RNA",
        pt.size = 0, 
        sort=F,
        log = T,
)  + labs(x = "", y = "") + labs(title = paste0("TPM2"))+ 
  theme(plot.title = element_text(face = "bold.italic"), axis.title.x = element_text(face = "bold"), text = element_text(size = 25), axis.text = element_text(size = 20)) +
  NoLegend()+ 
  stat_compare_means(comparisons = comparisons, label.y = c(350, 350, 400), label = "p.signif") + 
  #stat_compare_means(label.x = 2.5, label.y = 475, size = 5) +
  geom_boxplot(width=0.1, fill = "grey", alpha = 0.5, outlier.size = 0) + 
  ylim(0,425)
v8
```

```{r}
plot_grid(v1,v2,v3,v4, v5, v6, v7, v8)
```
```{r}
colnames(MyoCAFs@meta.data)
Idents(MyoCAFs) <- "stim"
MyoCAFs.Markers <- FindAllMarkers(MyoCAFs, only.pos = T, logfc.threshold = 0.2)
Top50 <- MyoCAFs.Markers %>% top_n(n = 150, wt = avg_logFC)
Heat1 <- DoHeatmap(MyoCAFs, features =Top50$gene, size = 3, 
          draw.lines = F) +theme(text = element_text(size = 12,face="bold.italic"))+scale_fill_gradientn(colors = rev(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061")))
Heat1

```

```{r warning=F, message=F}

MyoMon <- CAFs
MyoMon <- as.CellDataSet(MyoMon, reduction = "umap")
MyoMon  
```


```{r}
MyoMon <- estimateSizeFactors(MyoMon)
MyoMon <- estimateDispersions(MyoMon)
MyoMon <- detectGenes(MyoMon, min_expr = 0.1)
print(head(fData(MyoMon)))

```

```{r}
expressed_genes <- row.names(subset(fData(MyoMon), num_cells_expressed >= 5))
```

```{r}
head(pData(MyoMon))
pData(MyoMon)$Total_mRNAs <- Matrix::colSums(exprs(MyoMon))
upper_bound <- 10^(mean(log10(pData(MyoMon)$Total_mRNAs)) +
                     2*sd(log10(pData(MyoMon)$Total_mRNAs)))
lower_bound <- 10^(mean(log10(pData(MyoMon)$Total_mRNAs)) -
                     2*sd(log10(pData(MyoMon)$Total_mRNAs)))
```

```{r}
qplot(Total_mRNAs, data = pData(MyoMon), color = stim, geom =
        "density") +
  geom_vline(xintercept = lower_bound) +
  geom_vline(xintercept = upper_bound)
```


```{r}
pData(MyoMon)$CellType <- pData(MyoMon)$stim
pie <- ggplot(pData(MyoMon), aes(x = factor(1), fill = factor(CellType))) + geom_bar(width = 1)
pie + coord_polar(theta = "y") + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
```
From Monocle:
We recommend users order cells using genes selected by an unsupervised procedure called "dpFeature".
To use dpFeature, we first select superset of feature genes as genes expressed in at least 5% of all the cells.
```{r}
MyoMon <- detectGenes(MyoMon, min_expr = 0.1)
fData(MyoMon)$use_for_ordering <-
    fData(MyoMon)$num_cells_expressed > 0.05 * ncol(MyoMon)
```
Then we will perform a PCA analysis to identify the variance explained by each PC (principal component). We can look at a scree plot and determine how many pca dimensions are wanted based on whether or not there is a significant gap between that component and the component after it. By selecting only the high loading PCs, we effectively only focus on the more interesting biological variations.
```{r}
plot_pc_variance_explained(MyoMon, return_all = F)
```
We will then run reduceDimension with t-SNE as the reduction method on those top PCs and project them further down to two dimensions. 
I took 10 PCs because that is what I have used in the analysis in Seurat 

```{r}
MyoMon <- reduceDimension(MyoMon,
                              max_components = 2,
                              norm_method = 'log',
                              num_dim = 10,
                              reduction_method = 'tSNE',
                              verbose = T)

```
Then we can run density peak clustering to identify the clusters on the 2-D t-SNE space. The densityPeak algorithm clusters cells based on each cell's local density (Ρ) and the nearest distance (Δ) of a cell to another cell with higher distance. We can set a threshold for the Ρ, Δ and define any cell with a higher local density and distance than the thresholds as the density peaks. Those peaks are then used to define the clusters for all cells. By default, clusterCells choose 95% of Ρ and Δ to define the thresholds. We can also set a number of clusters (n) we want to cluster. In this setting, we will find the top n cells with high Δ with Δ among the top 50% range. The default setting often gives good clustering.
```{r}
MyoMon <- clusterCells(MyoMon, verbose = F)
```

```{r}
plot_cell_clusters(MyoMon, color_by = 'as.factor(Cluster)')
```

```{r}
plot_cell_clusters(MyoMon, color_by = 'as.factor(stim)')
```
We also provide the decision plot for users to check the Ρ, Δ for each cell and decide the threshold for defining the cell clusters.
```{r}
plot_rho_delta(MyoMon, rho_threshold = 10, delta_threshold = 4 )
```
We could then re-run clustering based on the user defined threshold. To facilitate the computation, we can set (skip_rho_sigma = T) which enables us to skip the calculation of the Ρ, Σ.
```{r}
MyoMon <- clusterCells(MyoMon,
                 rho_threshold = 2,
                 delta_threshold = 4,
                 skip_rho_sigma = T,
                 verbose = F)
```

```{r}
plot_cell_clusters(MyoMon, color_by = 'as.factor(Cluster)')
```

```{r}
plot_cell_clusters(MyoMon, color_by = 'as.factor(stim)')
```
After we confirm the clustering makes sense, we can then perform differential gene expression test as a way to extract the genes that distinguish them.
```{r}
clustering_DEG_genes <-
    differentialGeneTest(MyoMon[expressed_genes,],
          fullModelFormulaStr = '~stim')
```
We will then select the top 1000 significant genes as the ordering genes.
```{r}
MyoMon_ordering_genes <-
    row.names(clustering_DEG_genes)[order(clustering_DEG_genes$qval)][1:1000]

MyoMon <- setOrderingFilter(MyoMon, ordering_genes = MyoMon_ordering_genes)

MyoMon <- reduceDimension(MyoMon, method = 'DDRTree')

MyoMon <- orderCells(MyoMon)

plot_cell_trajectory(MyoMon, color_by = "stim", show_tree = T, show_branch_points = T)
```

```{r}
plot_cell_trajectory(MyoMon,cell_size = 0.5, color_by = "stim", show_tree = T, show_branch_points = T, show_state_number = T, theta = 180) + labs(title = "CAF Subtypes",x = "Pseudotime", y = "") + theme(axis.text = element_blank(), axis.ticks = element_blank()) + NoLegend()
```

```{r}
plot_cell_trajectory(MyoMon, color_by = "Pseudotime", show_tree = T, show_branch_points = F, theta = 180)+ scale_color_viridis(option = "C")+ labs(title = "Monocle Pseudotime",x = "Monocle", y = "") + theme(legend.position = "top", legend.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) 
```


```{r}
# cluster the top 50 genes that vary as a function of pseudotime

clustering_DEG_genes %>% arrange(qval) %>% head(200) %>% select(gene_short_name) -> gene_to_cluster
gene_to_cluster <- gene_to_cluster$gene

my_pseudotime_cluster <- plot_pseudotime_heatmap(MyoMon[gene_to_cluster,], num_clusters = 5,cores = 4,show_rownames = TRUE,return_heatmap = TRUE, add_annotation_col = )

plot_pheatmap(my_pseudotime_cluster) # Works in Console only

```

```{r}
# cluster the top 50 genes that vary as a function of pseudotime

gene_to_cluster2 <- c( "PDE5A", "GMPR2", "GMPR", "PRKACB", "PRKACA", "ADCY3")


my_pseudotime_cluster2 <- plot_pseudotime_heatmap(MyoMon[gene_to_cluster2,], num_clusters = 5,cores = 4,show_rownames = TRUE,return_heatmap = TRUE, )

plot_pheatmap(my_pseudotime_cluster2)

```

```{r}
pseudotime_monocle <-
    data.frame(
        Cell.type = phenoData(MyoMon)$stim,
        pseudotime = phenoData(MyoMon)$Pseudotime,
        Class = phenoData(MyoMon)$Cell.Culture,
        States = phenoData(MyoMon)$Cell.States
    )

ggplot(pseudotime_monocle, 
       aes(x = pseudotime, 
           y = States, colour = Cell.type))+
    geom_quasirandom(groupOnX = F) + theme_classic() +
    xlab("monocle pseudotime") + ylab("Cell types") +
    ggtitle("Cells ordered by monocle pseudotime")

ggplot(pseudotime_monocle)+ 
  geom_density(aes(x = pseudotime,fill = Cell.type,alpha =0.3,color=Cell.type))+
  theme_classic() +
    xlab("monocle pseudotime") + ylab("Cell types") +
    ggtitle("Cells ordered by monocle pseudotime")

Hot2 <- ggplot(pseudotime_monocle) +
  geom_jitter(aes(pseudotime,Class, color = Cell.type),height = 0.04, size = 1.5) + theme_classic()+ xlab("monocle pseudotime")
Hot2

```

```{r}


plot_cell_trajectory(MyoMon,cell_size = 0.5, color_by = "stim", show_tree = T, show_branch_points = F, show_state_number = F) + labs(title = "CAF Subtypes",x = "Pseudotime", y = "") + theme(axis.text = element_blank(), axis.ticks = element_blank()) + NoLegend() + facet_wrap(~Cell.States, nrow = 5)

ggplot(pseudotime_monocle) +
  geom_jitter(aes(pseudotime,Class, color = Cell.type),height = 0.04, size = 2) + theme_classic()+ xlab("monocle pseudotime")

plot_cell_trajectory(MyoMon,cell_size = 2, color_by = "stim", show_tree = T, show_branch_points = T, show_state_number = F) + labs(title = "CAF Subtypes",x = "Pseudotime", y = "") + theme(axis.text = element_blank(), axis.ticks = element_blank()) + NoLegend() 

```

```{r}
BEAM_res <- BEAM(MyoMon, branch_point =1, cores = 4, )

BEAM_res <- BEAM_res[order(BEAM_res$qval),]

BEAM_res.filtered <- BEAM_res[,c("gene_short_name", "pval", "qval")]
```

```{r}
MyoMon.Beam <- MyoMon[row.names(subset(BEAM_res.filtered,qval < 5e-2)),]
```

```{r}
#To store the pheatmap object
Plot2 <- plot_pheatmap(plot_genes_branched_heatmap(MyoMon.Beam,branch_point = 2,
                            num_clusters = 5,
                            cores = 4,
                            use_gene_short_name = TRUE,
                            show_rownames = F,
                            return_heatmap = TRUE,
                            #branch_labels = c("MyoCAFs", "ColCAFs"), 
                            branch_colors = c("darkgrey", "#00B0F6", "#E76BF3")))


clusters <- Plot2$annotation_row


```
We can then save the data for further analysis.