STOFAD R code
Download: https://bmbl.bmi.osumc.edu/downloadFiles/stofad/script/mannual_label_for_customized_intersectoin_spot.R
setwd("d:/my_analysis/ChenShuo/2nd_sequencing/new_data_with_multi_channel/")
library(Seurat)
library(Spaniel)
library(ggplot2)
library(patchwork)
library(dplyr)
require(stats)
options(future.globals.maxSize = 4000 * 1024^2)
# read control 2-5
wd <- "./2-5/"
my.2_5.object<- Load10X_Spatial(data.dir =wd ,filename = "filtered_feature_bc_matrix.h5")
my.2_5.object$patient <- "control"
my.2_5.object$patientID <- "2_5"
my.2_5.object<- SCTransform(my.2_5.object, assay = "Spatial", verbose = FALSE)
my.2_5.object<- RunPCA(my.2_5.object, assay = "SCT", verbose = FALSE)
my.2_5.object<- FindNeighbors(my.2_5.object, reduction = "pca", dims = 1:10,k.param = 10)
my.2_5.object<- FindClusters(my.2_5.object, verbose = FALSE,resolution = 0.4)
my.2_5.object<- RunUMAP(my.2_5.object, reduction = "pca", dims = 1:10)
dim(my.2_5.object)
DimPlot(my.2_5.object, reduction = "umap", label = TRUE,pt.size = 2,label.size = 8)
# add layer label
my.2_5.label <- read.csv("New_layer_annotation/2-5 Layer.csv",header = T,row.names = 1)
identical(colnames(my.2_5.object),rownames(my.2_5.label))
my.2_5.label$Layer[my.2_5.label$Layer == ""] <- "Noise"
# my.2_5.label <- my.2_5.label[match(colnames(my.2_5.object),rownames(my.2_5.label)),]
my.2_5.object <- AddMetaData(my.2_5.object,metadata = my.2_5.label)
# add other label
# add Tau
Tau.2_5.label <- read.csv("New staining annotation/Tau46_TauC/2-5 Tau.csv",header = T,row.names = 1)
Tau.2_5.label$Tau46.TauC[Tau.2_5.label$Tau46.TauC == ""] <- "Not_defined"
unique(Tau.2_5.label$Tau46.TauC)
# add WFS1
WFS1.2_5.label <- read.csv("New staining annotation/WFS1/2-5 WFS1.csv",header = T,row.names = 1)
WFS1.2_5.label$WFS1[WFS1.2_5.label$WFS1==""] <- "Not_defined"
unique(WFS1.2_5.label$WFS1)
# add label to object
my.2_5.object <- AddMetaData(my.2_5.object,metadata = Tau.2_5.label)
my.2_5.object <- AddMetaData(my.2_5.object,metadata = WFS1.2_5.label)
colnames(my.2_5.object@meta.data)
SpatialDimPlot(my.2_5.object, label = TRUE, label.size = 11,alpha = c(0,0.1))
my.2_5.object$number_gene <- as.numeric(my.2_5.object$nFeature_Spatial)
SpatialDimPlot(my.2_5.object, label = TRUE, label.size = 11,alpha = c(0,0.1),group.by = "number_gene")
SpatialDimPlot(my.2_5.object,group.by = "Layer", label = TRUE, label.size = 11,
cols = c("Layer 1" = "#2E91E5",
"Layer 2"= "#E15F99",
"Layer 3"= "#1CA71C",
"Layer 4"= "#FB0D0D",
"Layer 5"= "#DA16FF",
"Layer 6"= "#B68100",
"White Matter"= "#222A2A",
"Noise"= "#750D86"
))
#read patient 2-8
wd <- "./2-8/"
my.2_8.object<- Load10X_Spatial(data.dir =wd ,filename = "filtered_feature_bc_matrix.h5")
my.2_8.object$patient <- "AD"
my.2_8.object$patientID <- "2_8"
my.2_8.object<- SCTransform(my.2_8.object, assay = "Spatial", verbose = FALSE)
my.2_8.object<- RunPCA(my.2_8.object, assay = "SCT", verbose = FALSE)
my.2_8.object<- FindNeighbors(my.2_8.object, reduction = "pca", dims = 1:10,k.param = 10)
my.2_8.object<- FindClusters(my.2_8.object, verbose = FALSE,resolution = 0.4)
my.2_8.object<- RunUMAP(my.2_8.object, reduction = "pca", dims = 1:10)
dim(my.2_8.object)
DimPlot(my.2_8.object, reduction = "umap", label = TRUE,pt.size = 2,label.size = 8)
my.2_8.label <- read.csv("New_layer_annotation/2-8 Layer.csv",header = T,row.names = 1)
identical(colnames(my.2_8.object),rownames(my.2_8.label))
my.2_8.label$Layer[my.2_8.label$Layer == ""] <- "Noise"
# my.2_8.label <- my.2_8.label[match(colnames(my.2_8.object),rownames(my.2_8.label)),]
# add other label
# add Tau
Tau.2_8.label <- read.csv("New staining annotation/Tau46_TauC/2-8 Tau.csv",header = T,row.names = 1)
Tau.2_8.label$Tau46.TauC[Tau.2_8.label$Tau46.TauC == ""] <- "Not_defined"
unique(Tau.2_8.label$Tau46.TauC)
# add WFS1
WFS1.2_8.label <- read.csv("New staining annotation/WFS1/2-8 WFS1.csv",header = T,row.names = 1)
WFS1.2_8.label$WFS1[WFS1.2_8.label$WFS1 == ""] <- "Not_defined"
unique(WFS1.2_8.label$WFS1)
# add Abeta
Abeta.2_8.label <- read.csv("New staining annotation/Ab/2-8 Ab.csv",header = T,row.names = 1)
Abeta.2_8.label$Ab[Abeta.2_8.label$Ab ==""] <- "Not_defined"
Abeta.2_8.label$Ab[Abeta.2_8.label$Ab =="Ab "] <- "Ab"
unique(Abeta.2_8.label$Ab)
# add AT8
AT8.2_8.label <- read.csv("New staining annotation/AT8/2-8 AT8 .csv",header = T,row.names = 1)
AT8.2_8.label$AT8[AT8.2_8.label$AT8 ==""] <- "Not_defined"
unique(AT8.2_8.label$AT8)
# add meta to object
my.2_8.object <- AddMetaData(my.2_8.object, metadata =Tau.2_8.label)
my.2_8.object <- AddMetaData(my.2_8.object, metadata =WFS1.2_8.label)
my.2_8.object <- AddMetaData(my.2_8.object, metadata =Abeta.2_8.label)
my.2_8.object <- AddMetaData(my.2_8.object, metadata =AT8.2_8.label)
my.2_8.object <- AddMetaData(my.2_8.object,metadata = my.2_8.label)
my.2_8.object$Layer[which(my.2_8.object$Layer == "White matter")] <- "White Matter"
SpatialDimPlot(my.2_8.object, label = TRUE, label.size = 11,alpha = c(0,0.1))
SpatialDimPlot(my.2_8.object,group.by = "AT8", label = TRUE, label.size = 11,alpha = c(0,0.1))
SpatialDimPlot(my.2_8.object,group.by = "Layer", label = TRUE, label.size = 11,
cols = c("Layer 1" = "#2E91E5",
"Layer 2"= "#E15F99",
"Layer 3"= "#1CA71C",
"Layer 4"= "#FB0D0D",
"Layer 5"= "#DA16FF",
"Layer 6"= "#B68100",
"White Matter"= "#222A2A",
"Noise"= "#750D86"
))
# read control 18-64
wd <- "./18-64/"
my.18_64.object<- Load10X_Spatial(data.dir =wd ,filename = "filtered_feature_bc_matrix.h5")
my.18_64.object$patient <- "control"
my.18_64.object$patientID <- "18_64"
my.18_64.object<- SCTransform(my.18_64.object, assay = "Spatial", verbose = FALSE)
my.18_64.object<- RunPCA(my.18_64.object, assay = "SCT", verbose = FALSE)
my.18_64.object<- FindNeighbors(my.18_64.object, reduction = "pca", dims = 1:10,k.param = 10)
my.18_64.object<- FindClusters(my.18_64.object, verbose = FALSE,resolution = 0.4)
my.18_64.object<- RunUMAP(my.18_64.object, reduction = "pca", dims = 1:10)
dim(my.18_64.object)
DimPlot(my.18_64.object, reduction = "umap", label = TRUE,pt.size = 2,label.size = 8)
my.18_64.label <- read.csv("New_layer_annotation/18-64 Layer.csv",header = T,row.names = 1)
identical(colnames(my.18_64.object),rownames(my.18_64.label))
my.18_64.label$Layer[my.18_64.label$Layer == ""] <- "Noise"
# add other label
# add Tau
Tau.18_64.label <- read.csv("New staining annotation/Tau46_TauC/18-64 Tau.csv",header = T,row.names = 1)
Tau.18_64.label$Tau46.TauC[Tau.18_64.label$Tau46.TauC == ""] <- "Not_defined"
unique(Tau.18_64.label$Tau46.TauC)
# add WFS1
WFS1.18_64.label <- read.csv("New staining annotation/WFS1/18-64 WFS1.csv",header = T,row.names = 1)
WFS1.18_64.label$WFS1[WFS1.18_64.label$WFS1==""] <- "Not_defined"
unique(WFS1.18_64.label$WFS1)
# add label to object
my.18_64.object <- AddMetaData(my.18_64.object,metadata = Tau.18_64.label)
my.18_64.object <- AddMetaData(my.18_64.object,metadata = WFS1.18_64.label)
my.18_64.object <- AddMetaData(my.18_64.object,metadata = my.18_64.label)
# my.18_64.object$Layer[which(my.18_64.object$Layer == "White matter")] <- "White Matter"
SpatialDimPlot(my.18_64.object, label = TRUE, label.size = 11,alpha = c(0,0.1))
SpatialDimPlot(my.18_64.object,group.by = "Layer", label = TRUE, label.size = 11,
cols = c("Layer 1" = "#2E91E5",
"Layer 2"= "#E15F99",
"Layer 3"= "#1CA71C",
"Layer 4"= "#FB0D0D",
"Layer 5"= "#DA16FF",
"Layer 6"= "#B68100",
"White Matter"= "#222A2A",
"Noise"= "#750D86"
))
# read patient T4857
wd <- "./T4857/"
my.T4857.object<- Load10X_Spatial(data.dir =wd ,filename = "filtered_feature_bc_matrix.h5")
my.T4857.object$patient <- "AD"
my.T4857.object$patientID <- "T4857"
my.T4857.object<- SCTransform(my.T4857.object, assay = "Spatial", verbose = FALSE)
my.T4857.object<- RunPCA(my.T4857.object, assay = "SCT", verbose = FALSE)
my.T4857.object<- FindNeighbors(my.T4857.object, reduction = "pca", dims = 1:10,k.param = 10)
my.T4857.object<- FindClusters(my.T4857.object, verbose = FALSE,resolution = 0.4)
my.T4857.object<- RunUMAP(my.T4857.object, reduction = "pca", dims = 1:10)
dim(my.T4857.object)
DimPlot(my.T4857.object, reduction = "umap", label = TRUE,pt.size = 2,label.size = 8)
my.T4857.label <- read.csv("New_layer_annotation/T4857 Layers.csv",header = T,row.names = 1)
identical(colnames(my.T4857.object),rownames(my.T4857.label))
my.T4857.label$Layer[my.T4857.label$Layer == ""] <- "Noise"
# my.T4857.label <- my.T4857.label[match(colnames(my.T4857.object),rownames(my.T4857.label)),]
my.T4857.object <- AddMetaData(my.T4857.object,metadata = my.T4857.label)
SpatialDimPlot(my.T4857.object, label = TRUE, label.size = 11,alpha = c(0,0.1))
# add other layer
# add Tau
Tau.T4857.label <- read.csv("New staining annotation/Tau46_TauC/T4857 Tau.csv",header = T,row.names = 1)
Tau.T4857.label$Tau46.TauC[Tau.T4857.label$Tau46.TauC == ""] <- "Not_defined"
unique(Tau.T4857.label$Tau46.TauC)
# add WFS1
WFS1.T4857.label <- read.csv("New staining annotation/WFS1/T4857 WFS1.csv",header = T,row.names = 1)
WFS1.T4857.label$WFS1[WFS1.T4857.label$WFS1 == ""] <- "Not_defined"
unique(WFS1.T4857.label$WFS1)
# add Abeta
Abeta.T4857.label <- read.csv("New staining annotation/Ab/T4857 Ab.csv",header = T,row.names = 1)
Abeta.T4857.label$Ab[Abeta.T4857.label$Ab ==""] <- "Not_defined"
unique(Abeta.T4857.label$Ab)
# add AT8
AT8.T4857.label <- read.csv("New staining annotation/AT8/2-8 AT8 .csv",header = T,row.names = 1)
AT8.T4857.label$AT8[AT8.T4857.label$AT8 ==""] <- "Not_defined"
unique(AT8.T4857.label$AT8)
# add regions
regions.T4857.label <- read.csv("New staining annotation/T4857 Region.csv",header = T,row.names = 1)
unique(regions.T4857.label$Region)
regions.T4857.label$Region[regions.T4857.label$Region ==""] <- "Not_defined"
# add meta to object
my.T4857.object <- AddMetaData(my.T4857.object, metadata =Tau.T4857.label)
my.T4857.object <- AddMetaData(my.T4857.object, metadata =WFS1.T4857.label)
my.T4857.object <- AddMetaData(my.T4857.object, metadata =Abeta.T4857.label)
my.T4857.object <- AddMetaData(my.T4857.object, metadata =AT8.T4857.label)
my.T4857.object <- AddMetaData(my.T4857.object,metadata = my.T4857.label)
my.T4857.object <- AddMetaData(my.T4857.object,metadata = regions.T4857.label)
my.T4857.object$Layer[which(my.T4857.object$Layer == "White matter")] <- "White Matter"
SpatialDimPlot(my.T4857.object,group.by = "Layer", label = TRUE, label.size = 11,
cols = c("Layer 1" = "#2E91E5",
"Layer 2"= "#E15F99",
"Layer 3"= "#1CA71C",
"Layer 4"= "#FB0D0D",
"Layer 5"= "#DA16FF",
"Layer 6"= "#B68100",
"White Matter"= "#222A2A",
"Noise"= "#750D86"
))
Idents(my.T4857.object) <- "Layer"
# DEGs comparison
# 1 vs 3: Tau region for each layer.
# 2-5 control1
# 18-64 control2
# 2-8 ad1
# T4857 ad2
require(openxlsx)
library(xlsx)
library(ComplexHeatmap)
`%notin%` <- Negate(`%in%`)
seurat.object.list <- list(C1 = my.2_5.object, C2 = my.18_64.object, AD1 = my.2_8.object, AD2 = my.T4857.object)
comparison_vec <- rbind(AD1_C1 = c(1,0,1,0),
AD2_C1 = c(1,0,0,1),
AD1_C2 = c(0,1,1,0),
AD2_C2 = c(0,1,0,1))
tmp.merge.object <- merge(seurat.object.list[[1]],y = c(seurat.object.list[[2]],seurat.object.list[[3]],seurat.object.list[[4]]))
tmp.layer.info <- unique(tmp.merge.object$Layer)
tmp.layer.info <- tmp.layer.info[c(-1,-8)]
list_of_datasets <- list()
for (j in seq_len(length(tmp.layer.info))){
DEG_list <- list()
cell_number <- list()
for (i in 1:4){
tmp.object.list.subset <- seurat.object.list[which(comparison_vec[i,]==1)]
tmp.merge.object <- merge(tmp.object.list.subset[[1]],tmp.object.list.subset[[2]])
tmp.layer <- tmp.layer.info[j]
layer.spot <- names(tmp.merge.object$Layer)[tmp.merge.object$Layer ==tmp.layer ]
Control.spots <- names(tmp.merge.object$patient)[tmp.merge.object$patient == "control"]
AD.spots <- names(tmp.merge.object$patient)[tmp.merge.object$patient == "AD"]
Tau.spots <- names(tmp.merge.object$Tau46.TauC)[tmp.merge.object$Tau46.TauC == "Tau"]
cell.group1 <- intersect(layer.spot,intersect(AD.spots,Tau.spots))
cell.group2 <- intersect(layer.spot,intersect(Control.spots, Tau.spots))
tmp.DEG_list <- FindMarkers(tmp.merge.object, ident.1 = cell.group1, ident.2 = cell.group2, logfc.threshold = 0.05 , min.pct = 0.25,min.cells.group = 1)
tmp.DEG_list <- tmp.DEG_list[abs(tmp.DEG_list$avg_logFC) > 0.25,]
DEG_list[[i]] <- tmp.DEG_list
cell_number[[i]] <- c(length(cell.group1),length(cell.group2))
cell.group1.name <- gsub("_[1,2,3,4,5,6]$","",cell.group1)
cell.group2.name <- gsub("_[1,2,3,4,5,6]$","",cell.group2)
AD.cell.name <- names(tmp.merge.object$patient)[ tmp.merge.object$patient == "AD"]
AD.cell.name <- gsub("_[1,2,3,4,5,6]$","",AD.cell.name )
control.cell.name <- names( tmp.merge.object$patient)[ tmp.merge.object$patient == "control"]
control.cell.name <- gsub("_[1,2,3,4,5,6]$","",control.cell.name )
AD_barcode <- as.data.frame(cbind(ID = AD.cell.name, barcode = "unselected"))
control_barcode <- as.data.frame(cbind(ID = control.cell.name,barcode = "unselected"))
vector.name <- unlist(strsplit(rownames(comparison_vec)[i],"_"))
AD_barcode$barcode[AD_barcode$ID %in% cell.group1.name] <- paste0(rownames(comparison_vec)[i],
"_layer_",tmp.layer.info[j],
"_",vector.name[1]
)
control_barcode$barcode[control_barcode$ID %in% cell.group2.name] <- paste0(rownames(comparison_vec)[i],
"_layer_",tmp.layer.info[j],
"_",vector.name[2]
)
write.csv(AD_barcode,file = paste0("./1_vs_3_spot_csv_for_loupe/",rownames(comparison_vec)[i],
"_layer_",tmp.layer.info[j],
"_",vector.name[1],".csv"), quote = F,row.names = F)
write.csv(control_barcode,file = paste0("./1_vs_3_spot_csv_for_loupe/",rownames(comparison_vec)[i],
"_layer_",tmp.layer.info[j],
"_",vector.name[2],".csv"),quote = F,row.names = F)
}
print(tmp.layer)
names(DEG_list) <- rownames(comparison_vec)
# up regulate gene
AD_up_gene_list <- sapply(DEG_list, function(x){
tmp.up_name <- rownames(x)[x$avg_logFC >0]
return(tmp.up_name)
})
AD_up_gene_df <- list_to_matrix(AD_up_gene_list)
AD_up_gene_lgfc_df <- AD_up_gene_df
colnames(AD_up_gene_lgfc_df) <- paste0(tmp.layer,"_up_logFC_", colnames(AD_up_gene_df),cell_number)
for (n in 1:ncol(AD_up_gene_df)){
for (m in 1:nrow(AD_up_gene_df)){
if(AD_up_gene_df[m,n] ==1){
tmp_AD_up_genename <- rownames(AD_up_gene_df)[m]
AD_up_gene_lgfc <- DEG_list[[n]][tmp_AD_up_genename,c("avg_logFC")]
AD_up_gene_lgfc_df[m,n] <- AD_up_gene_lgfc
}
}
}
AD_up_gene_pval_df <- AD_up_gene_df
colnames(AD_up_gene_pval_df) <- paste0(tmp.layer,"up_Adj.pval_", colnames(AD_up_gene_df),cell_number)
for (n in 1:ncol(AD_up_gene_df)){
for (m in 1:nrow(AD_up_gene_df)){
if(AD_up_gene_df[m,n] ==1){
tmp_AD_up_genename <- rownames(AD_up_gene_df)[m]
AD_up_gene_lgfc <- DEG_list[[n]][tmp_AD_up_genename,c("p_val_adj")]
AD_up_gene_pval_df[m,n] <- AD_up_gene_lgfc
}
}
}
# down regulated gene
AD_down_gene_list <- sapply(DEG_list, function(x){
tmp.down_name <- rownames(x)[x$avg_logFC <0]
return(tmp.down_name)
})
AD_down_gene_df <- list_to_matrix(AD_down_gene_list)
AD_down_gene_lgfc_df <- AD_down_gene_df
colnames(AD_down_gene_lgfc_df) <- paste0(tmp.layer,"down_logFC_", colnames(AD_down_gene_df),cell_number)
for (n in 1:ncol(AD_down_gene_df)){
for (m in 1:nrow(AD_down_gene_df)){
if(AD_down_gene_df[m,n] ==1){
tmp_AD_down_genename <- rownames(AD_down_gene_df)[m]
AD_down_gene_lgfc <- DEG_list[[n]][tmp_AD_down_genename,c("avg_logFC")]
AD_down_gene_lgfc_df[m,n] <- AD_down_gene_lgfc
}
}
}
AD_down_gene_pval_df <- AD_down_gene_df
colnames(AD_down_gene_pval_df) <- paste0(tmp.layer,"down_Adj.pval_", colnames(AD_down_gene_df),cell_number)
for (n in 1:ncol(AD_down_gene_df)){
for (m in 1:nrow(AD_down_gene_df)){
if(AD_down_gene_df[m,n] ==1){
tmp_AD_down_genename <- rownames(AD_down_gene_df)[m]
AD_down_gene_lgfc <- DEG_list[[n]][tmp_AD_down_genename,c("p_val_adj")]
AD_down_gene_pval_df[m,n] <- AD_down_gene_lgfc
}
}
}
final_AD_up_matrix <- cbind(ID= rownames(AD_up_gene_df),
overlap = rowSums(AD_up_gene_df),
AD_up_gene_df,
AD_up_gene_lgfc_df,
AD_up_gene_pval_df)
final_AD_down_matrix <- cbind(ID= rownames(AD_down_gene_df),
overlap = rowSums(AD_down_gene_df),
AD_down_gene_df,
AD_down_gene_lgfc_df,
AD_down_gene_pval_df)
# remove up and down gene in one comparison
gene_for_up_and_down <- intersect(rownames(final_AD_up_matrix),rownames(final_AD_down_matrix))
final_AD_up_matrix <- final_AD_up_matrix[rownames(final_AD_up_matrix) %notin%gene_for_up_and_down,]
final_AD_down_matrix <- final_AD_down_matrix[rownames(final_AD_down_matrix) %notin% gene_for_up_and_down,]
list_of_datasets[[2*j-1]] <- final_AD_up_matrix
names(list_of_datasets)[2*j-1] <- paste0(tmp.layer, "_AD_up_DEG")
list_of_datasets[[2*j]] <- final_AD_down_matrix
names(list_of_datasets)[2*j] <- paste0(tmp.layer, "_AD_down_DEG")
}
library(org.Hs.eg.db)
library(clusterProfiler)
library(ReactomePA)
final.list <-list()
pathway.list <-list()
for (i in 1:length(list_of_datasets)){
sorted.data <- as.data.frame(list_of_datasets[[i]])
genes.use <- sorted.data$ID[sorted.data$overlap >= 2]
GO_pathway <- enrichGO(gene = genes.use, OrgDb = org.Hs.eg.db, ont = "BP", keyType = "SYMBOL", pAdjustMethod = "BH", pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
GO_res <- simplify(GO_pathway)
GO_simplied_res <- GO_res@result[GO_res@result$p.adjust < 0.05,]
dim(GO_simplied_res)
if(dim(GO_simplied_res)[1]==0){
GO_simplied_res <- as.data.frame(cbind(NO_results= "NO_result_for_GO"))
}
gene.convert <- bitr(genes.use, fromType = "SYMBOL", toType = c("ENSEMBL", "ENTREZID"), OrgDb = org.Hs.eg.db)
reactome_analysis <- enrichPathway(gene=gene.convert$ENTREZID, pvalueCutoff = 0.05, readable=TRUE)
reactome_res <- reactome_analysis@result[reactome_analysis@result$p.adjust <0.05,]
dim(reactome_res)
if(dim(reactome_res)[1]==0){
reactome_res <- as.data.frame(cbind(NO_results= "NO_result_for_reactome"))
}
KEGG_pathway <- enrichKEGG(gene = gene.convert$ENTREZID, organism = "hsa", keyType = "kegg", pAdjustMethod = "BH", pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
KEGG_res <- KEGG_pathway@result[KEGG_pathway@result$p.adjust<0.05,]
dim(KEGG_res)
if(dim( KEGG_res)[1]==0){
KEGG_res <- as.data.frame(cbind(NO_results= "NO_result_for_KEGG"))
}else{
for (j in 1:nrow(KEGG_res)){
tmp_name <- KEGG_res$geneID[j]
tmp_name <- unlist(strsplit(tmp_name,"/"))
index_entizID <- gene.convert$ENTREZID %in% tmp_name
converted_geneSymbol <- gene.convert$SYMBOL[index_entizID]
final_name <- paste0(converted_geneSymbol,collapse = "/")
KEGG_res$geneID[j] <- final_name
}
}
Go_KEGG.bind <- gdata::cbindX(GO_simplied_res,KEGG_res,reactome_res)
tmp.sorted.data <- as.data.frame(list_of_datasets[[i]])
tmp.sorted.data <- tmp.sorted.data[order(tmp.sorted.data$overlap,decreasing = T),]
pathway.list[[i]] <- gdata::cbindX(tmp.sorted.data,Go_KEGG.bind)
names(pathway.list)[i] <- names(list_of_datasets)[i]
}
save(pathway.list,file = "pathway.list.Rdata")
save(list_of_datasets,file = "list_of_datasets.Rdata")
load("pathway.list.Rdata")
load("list_of_datasets.Rdata")
xlsx::write.xlsx(pathway.list[[1]], file = "1_vs_3_remove_overlap_with_cell_number_with_pathway_overlap2.xlsx",sheetName= names(pathway.list)[1],row.names = FALSE)
for (sheet.idx in 2:length(list_of_datasets)){
xlsx::write.xlsx(pathway.list[[sheet.idx]], file = "1_vs_3_remove_overlap_with_cell_number_with_pathway_overlap2.xlsx",sheetName= names(pathway.list)[sheet.idx],append = T,row.names = FALSE)
}
xlsx::write.xlsx(pathway.list[[9]], file = "1_vs_3_remove_overlap_with_cell_number_with_pathway9_overlap2.xlsx",sheetName= names(pathway.list)[9],row.names = FALSE)
xlsx::write.xlsx(pathway.list[[10]], file = "1_vs_3_remove_overlap_with_cell_number_with_pathway10_overlap2.xlsx",sheetName= names(pathway.list)[10],row.names = FALSE)
xlsx::write.xlsx(pathway.list[[11]], file = "1_vs_3_remove_overlap_with_cell_number_with_pathway11_overlap2.xlsx",sheetName= names(pathway.list)[11],row.names = FALSE)
xlsx::write.xlsx(pathway.list[[12]], file = "1_vs_3_remove_overlap_with_cell_number_with_pathway12_overlap2.xlsx",sheetName= names(pathway.list)[12],row.names = FALSE)
#################################################
#################################################
# AD: 19 (Layer AT8+/WFS1+) vs 20 (Layer AT8+/WFS1-) each layer
# 2-5 control1
# 18-64 control2
# 2-8 ad1
# T4857 ad2
require(openxlsx)
library(xlsx)
library(ComplexHeatmap)
`%notin%` <- Negate(`%in%`)
seurat.object.list <- list(AD1 = my.2_8.object, AD2 = my.T4857.object)
tmp.merge.object <- merge(seurat.object.list[[1]],seurat.object.list[[2]])
tmp.layer.info <- unique(tmp.merge.object$Layer)
tmp.layer.info <- tmp.layer.info[c(-7,-8)]
list_of_datasets <- list()
for (j in seq_len(length(tmp.layer.info))){
DEG_list <- list()
cell_number <- list()
for (i in 1:2){
tmp.object <-seurat.object.list[[i]]
tmp.layer <- tmp.layer.info[j]
# subset spots
layer.spot <- names(tmp.object$Layer)[tmp.object$Layer ==tmp.layer]
AD.WFS1_pos.spot <- names(tmp.object$WFS1)[tmp.object$WFS1 == "WFS1"]
AD.WFS1_neg.spot <- names(tmp.object$WFS1)[tmp.object$WFS1 == "Not_defined"]
AD.Tau_pos.spot <- names(tmp.object$Tau46.TauC)[tmp.object$Tau46.TauC == "Tau"]
AD.Tau_neg.spot <- names(tmp.object$Tau46.TauC)[tmp.object$Tau46.TauC == "Not_defined"]
AD.AT8_pos.spot <- names(tmp.object$AT8)[tmp.object$AT8 == "AT8"]
AD.AT8_neg.spot <- names(tmp.object$AT8)[tmp.object$AT8 == "Not_defined"]
# select group
cell.group1 <- intersect(intersect(AD.WFS1_pos.spot,AD.AT8_pos.spot),layer.spot)
cell.group2 <- intersect(intersect(AD.WFS1_neg.spot,AD.AT8_pos.spot),layer.spot)
tmp.DEG_list <- FindMarkers(tmp.object, ident.1 = cell.group1, ident.2 = cell.group2, logfc.threshold = 0.10 , min.pct = 0.25,min.cells.group = 1)
tmp.DEG_list <- tmp.DEG_list[abs(tmp.DEG_list$avg_logFC) > 0.25,]
DEG_list[[i]] <- tmp.DEG_list
cell_number[[i]] <- c(length(cell.group1),length(cell.group2))
cell.group1.name <- gsub("_[1,2,3,4,5,6]$","",cell.group1)
cell.group2.name <- gsub("_[1,2,3,4,5,6]$","",cell.group2)
tmp.AD.ID <- unique(tmp.object$patientID)
AD.cell.name <- names(tmp.merge.object$patient)[ tmp.merge.object$patientID == tmp.AD.ID]
AD.cell.name <- gsub("_[1,2,3,4,5,6]$","",AD.cell.name )
AD_cellGroup1_barcode <- as.data.frame(cbind(ID = AD.cell.name, barcode = "unselected"))
AD_cellGroup2_barcode <- as.data.frame(cbind(ID = AD.cell.name,barcode = "unselected"))
AD_cellGroup1_barcode$barcode[AD_cellGroup1_barcode$ID %in% cell.group1.name] <- paste0("AD", i,": ",tmp.AD.ID,
tmp.layer.info[j],
"_","WFS1+/AT8+")
AD_cellGroup2_barcode$barcode[AD_cellGroup2_barcode$ID %in% cell.group2.name] <- paste0("AD", i,": ",tmp.AD.ID,
tmp.layer.info[j],
"_","WFS1-/AT8+")
write.csv(AD_cellGroup1_barcode,file = paste0("./19_vs_20_spot_csv_for_loupe/",paste0("AD", i,"_",tmp.AD.ID,"_combination_19_",
tmp.layer.info[j],
"_","WFS1+_AT8+.csv")), quote = F,row.names = F)
write.csv(AD_cellGroup2_barcode,file = paste0("./19_vs_20_spot_csv_for_loupe/",paste0("AD", i,"_",tmp.AD.ID,"_combination_20_",
tmp.layer.info[j],
"_","WFS1-_AT8+.csv")),quote = F,row.names = F)
}
print(tmp.layer)
names(DEG_list) <- c("AD1","AD2")
if (dim(DEG_list[[1]])[1]==0 & dim(DEG_list[[2]])[1]==0){
list_of_datasets[[2*j-1]] <- c("no_result")
names(list_of_datasets)[2*j-1] <- paste0(tmp.layer, "_19_up_DEG")
list_of_datasets[[2*j]] <- c("no_result")
names(list_of_datasets)[2*j] <- paste0(tmp.layer, "_19_down_DEG")
next()
}
# up regulate gene
AD_up_gene_list <- sapply(DEG_list, function(x){
tmp.up_name <- rownames(x)[x$avg_logFC >0]
return(tmp.up_name)
})
AD_up_gene_df <- list_to_matrix(AD_up_gene_list)
AD_up_gene_lgfc_df <- AD_up_gene_df
colnames(AD_up_gene_lgfc_df) <- paste0(tmp.layer,"up_logFC_", colnames(AD_up_gene_df),cell_number)
for (n in 1:ncol(AD_up_gene_df)){
if(length(AD_up_gene_df[,n])==0){
next()
}
for (m in 1:nrow(AD_up_gene_df)){
if(AD_up_gene_df[m,n] ==1){
tmp_AD_up_genename <- rownames(AD_up_gene_df)[m]
AD_up_gene_lgfc <- DEG_list[[n]][tmp_AD_up_genename,c("avg_logFC")]
AD_up_gene_lgfc_df[m,n] <- AD_up_gene_lgfc
}
}
}
AD_up_gene_pval_df <- AD_up_gene_df
colnames(AD_up_gene_pval_df) <- paste0(tmp.layer,"up_Adj.pval_", colnames(AD_up_gene_df),cell_number)
for (n in 1:ncol(AD_up_gene_df)){
if(length(AD_up_gene_df[,n])==0){
next()
}
for (m in 1:nrow(AD_up_gene_df)){
if(AD_up_gene_df[m,n] ==1){
tmp_AD_up_genename <- rownames(AD_up_gene_df)[m]
AD_up_gene_lgfc <- DEG_list[[n]][tmp_AD_up_genename,c("p_val_adj")]
AD_up_gene_pval_df[m,n] <- AD_up_gene_lgfc
}
}
}
# down regulated gene
AD_down_gene_list <- sapply(DEG_list, function(x){
tmp.down_name <- rownames(x)[x$avg_logFC <0]
return(tmp.down_name)
})
AD_down_gene_df <- list_to_matrix(AD_down_gene_list)
AD_down_gene_lgfc_df <- AD_down_gene_df
colnames(AD_down_gene_lgfc_df) <- paste0(tmp.layer,"down_logFC_", colnames(AD_down_gene_df),cell_number)
for (n in 1:ncol(AD_down_gene_df)){
if(length(AD_down_gene_df[,n])==0){
next()
}
for (m in 1:nrow(AD_down_gene_df)){
if(AD_down_gene_df[m,n] ==1){
tmp_AD_down_genename <- rownames(AD_down_gene_df)[m]
AD_down_gene_lgfc <- DEG_list[[n]][tmp_AD_down_genename,c("avg_logFC")]
AD_down_gene_lgfc_df[m,n] <- AD_down_gene_lgfc
}
}
}
AD_down_gene_pval_df <- AD_down_gene_df
colnames(AD_down_gene_pval_df) <- paste0(tmp.layer,"down_Adj.pval_", colnames(AD_down_gene_df),cell_number)
for (n in 1:ncol(AD_down_gene_df)){
if(length(AD_down_gene_df[,n])==0){
next()
}
for (m in 1:nrow(AD_down_gene_df)){
if(AD_down_gene_df[m,n] ==1){
tmp_AD_down_genename <- rownames(AD_down_gene_df)[m]
AD_down_gene_lgfc <- DEG_list[[n]][tmp_AD_down_genename,c("p_val_adj")]
AD_down_gene_pval_df[m,n] <- AD_down_gene_lgfc
}
}
}
final_AD_up_matrix <- as.data.frame(cbind(ID= rownames(AD_up_gene_df),
overlap = rowSums(AD_up_gene_df),
AD_up_gene_df,
AD_up_gene_lgfc_df,
AD_up_gene_pval_df))
final_AD_down_matrix <- as.data.frame(cbind(ID= rownames(AD_down_gene_df),
overlap = rowSums(AD_down_gene_df),
AD_down_gene_df,
AD_down_gene_lgfc_df,
AD_down_gene_pval_df))
# remove up and down gene in one comparison
gene_for_up_and_down <- intersect(rownames(final_AD_up_matrix),rownames(final_AD_down_matrix))
final_AD_up_matrix <- final_AD_up_matrix[rownames(final_AD_up_matrix) %notin%gene_for_up_and_down,]
final_AD_down_matrix <- final_AD_down_matrix[rownames(final_AD_down_matrix) %notin% gene_for_up_and_down,]
if(dim(final_AD_up_matrix)[1] == 0){
final_AD_up_matrix <- as.data.frame(rbind(final_AD_up_matrix ,
rep("no_result",ncol(final_AD_up_matrix))))
}
if(dim(final_AD_down_matrix)[1] == 0){
final_AD_down_matrix <- as.data.frame(rbind(final_AD_down_matrix ,
rep("no_result",ncol(final_AD_down_matrix))))
}
list_of_datasets[[2*j-1]] <- final_AD_up_matrix
names(list_of_datasets)[2*j-1] <- paste0(tmp.layer, "_19_up_DEG")
list_of_datasets[[2*j]] <- final_AD_down_matrix
names(list_of_datasets)[2*j] <- paste0(tmp.layer, "_19_down_DEG")
}
library(org.Hs.eg.db)
library(clusterProfiler)
library(ReactomePA)
final.list <-list()
pathway.list <-list()
for (i in 1:length(list_of_datasets)){
if(list_of_datasets[[i]]=="no_result"){
next()
}
sorted.data <- as.data.frame(list_of_datasets[[i]])
genes.use <- sorted.data$ID[sorted.data$overlap >= 1]
GO_pathway <- enrichGO(gene = genes.use, OrgDb = org.Hs.eg.db, ont = "BP", keyType = "SYMBOL", pAdjustMethod = "BH", pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
if(is.null(GO_pathway)){
GO_simplied_res <- as.data.frame(cbind(NO_results= "NO_result_for_GO"))
}else{
GO_res <- simplify(GO_pathway)
GO_simplied_res <- GO_res@result[GO_res@result$p.adjust < 0.05,]
dim(GO_simplied_res)
if(dim(GO_simplied_res)[1]==0){
GO_simplied_res <- as.data.frame(cbind(NO_results= "NO_result_for_GO"))
}
}
gene.convert <- bitr(genes.use, fromType = "SYMBOL", toType = c("ENSEMBL", "ENTREZID"), OrgDb = org.Hs.eg.db)
reactome_analysis <- enrichPathway(gene=gene.convert$ENTREZID, pvalueCutoff = 0.05, readable=TRUE)
if(is.null(reactome_analysis)){
reactome_res <- as.data.frame(cbind(NO_results= "NO_result_for_reactome"))
}else{
reactome_res <- reactome_analysis@result[reactome_analysis@result$p.adjust <0.05,]
dim(reactome_res)
if(dim(reactome_res)[1]==0){
reactome_res <- as.data.frame(cbind(NO_results= "NO_result_for_reactome"))
}
}
KEGG_pathway <- enrichKEGG(gene = gene.convert$ENTREZID, organism = "hsa", keyType = "kegg", pAdjustMethod = "BH", pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
if(is.null(KEGG_pathway)){
KEGG_res <- as.data.frame(cbind(NO_results= "NO_result_for_KEGG"))
}else{
KEGG_res <- KEGG_pathway@result[KEGG_pathway@result$p.adjust<0.05,]
dim(KEGG_res)
if(dim( KEGG_res)[1]==0){
KEGG_res <- as.data.frame(cbind(NO_results= "NO_result_for_KEGG"))
}else{
for (j in 1:nrow(KEGG_res)){
tmp_name <- KEGG_res$geneID[j]
tmp_name <- unlist(strsplit(tmp_name,"/"))
index_entizID <- gene.convert$ENTREZID %in% tmp_name
converted_geneSymbol <- gene.convert$SYMBOL[index_entizID]
final_name <- paste0(converted_geneSymbol,collapse = "/")
KEGG_res$geneID[j] <- final_name
}
}
}
Go_KEGG.bind <- gdata::cbindX(GO_simplied_res,KEGG_res,reactome_res)
tmp.sorted.data <- as.data.frame(list_of_datasets[[i]])
tmp.sorted.data <- tmp.sorted.data[order(tmp.sorted.data$overlap,decreasing = T),]
pathway.list[[i]] <- gdata::cbindX(tmp.sorted.data,Go_KEGG.bind)
names(pathway.list)[i] <- names(list_of_datasets)[i]
}
for(i in 1:length(pathway.list)){
if(is.null(pathway.list[[i]])){
pathway.list[[i]]<- "no_result"
names(pathway.list)[i] <- names(list_of_datasets)[i]
}
}
xlsx::write.xlsx(pathway.list[[1]], file = "19_vs_20_remove_overlap_1.xlsx",sheetName= names(pathway.list)[1],row.names = FALSE)
for (sheet.idx in 2:length(pathway.list)){
xlsx::write.xlsx(pathway.list[[sheet.idx]], file = "19_vs_20_remove_overlap_1.xlsx",sheetName= names(pathway.list)[sheet.idx],append = T,row.names = FALSE)
}
xlsx::write.xlsx(pathway.list[[7]], file = "19_vs_20_remove_overlap_1_7.xlsx",sheetName= names(pathway.list)[7],row.names = FALSE)
xlsx::write.xlsx(pathway.list[[8]], file = "19_vs_20_remove_overlap_1_8.xlsx",sheetName= names(pathway.list)[8],row.names = FALSE)
xlsx::write.xlsx(pathway.list[[9]], file = "19_vs_20_remove_overlap_1_9.xlsx",sheetName= names(pathway.list)[9],row.names = FALSE)
xlsx::write.xlsx(pathway.list[[10]], file = "19_vs_20_remove_overlap_1_10.xlsx",sheetName= names(pathway.list)[10],row.names = FALSE)
xlsx::write.xlsx(pathway.list[[11]], file = "19_vs_20_remove_overlap_1_11.xlsx",sheetName= names(pathway.list)[11],row.names = FALSE)
xlsx::write.xlsx(pathway.list[[12]], file = "19_vs_20_remove_overlap_1_12.xlsx",sheetName= names(pathway.list)[12],row.names = FALSE)
#################################################
# AD: 20 (Layer AT8+/WFS1-) vs (17-20) (17 Layer Tau+/WFS1- - Layer AT8+/WFS1-) each layer
# 2-5 control1
# 18-64 control2
# 2-8 ad1
# T4857 ad2
require(openxlsx)
library(xlsx)
library(ComplexHeatmap)
`%notin%` <- Negate(`%in%`)
seurat.object.list <- list(AD1 = my.2_8.object, AD2 = my.T4857.object)
tmp.merge.object <- merge(seurat.object.list[[1]],seurat.object.list[[2]])
tmp.layer.info <- unique(tmp.merge.object$Layer)
tmp.layer.info <- tmp.layer.info[c(-7,-8)]
tmp.layer.info
list_of_datasets <- list()
for (j in seq_len(length(tmp.layer.info))){
DEG_list <- list()
cell_number <- list()
for (i in 1:2){
tmp.object <-seurat.object.list[[i]]
tmp.layer <- tmp.layer.info[j]
# subset spots
layer.spot <- names(tmp.object$Layer)[tmp.object$Layer ==tmp.layer]
AD.WFS1_pos.spot <- names(tmp.object$WFS1)[tmp.object$WFS1 == "WFS1"]
AD.WFS1_neg.spot <- names(tmp.object$WFS1)[tmp.object$WFS1 == "Not_defined"]
AD.Tau_pos.spot <- names(tmp.object$Tau46.TauC)[tmp.object$Tau46.TauC == "Tau"]
AD.Tau_neg.spot <- names(tmp.object$Tau46.TauC)[tmp.object$Tau46.TauC == "Not_defined"]
AD.AT8_pos.spot <- names(tmp.object$AT8)[tmp.object$AT8 == "AT8"]
AD.AT8_neg.spot <- names(tmp.object$AT8)[tmp.object$AT8 == "Not_defined"]
# select group
# cell group 1: 19
# cell group2: 17-20:
# 17 Layer Tau+/WFS1-
# 20 Layer AT8+/WFS1-
cell.group1 <- intersect(intersect(AD.WFS1_neg.spot,AD.AT8_pos.spot),layer.spot)
cell.group2.sub1 <- intersect(intersect(AD.WFS1_neg.spot,AD.Tau_pos.spot),layer.spot)
cell.group2.sub2 <- intersect(intersect(AD.WFS1_neg.spot,AD.AT8_pos.spot),layer.spot)
cell.group2 <- setdiff(cell.group2.sub1,cell.group2.sub2)
tmp.DEG_list <- FindMarkers(tmp.object, ident.1 = cell.group1, ident.2 = cell.group2, logfc.threshold = 0.10 , min.pct = 0.25,min.cells.group = 1)
tmp.DEG_list <- tmp.DEG_list[abs(tmp.DEG_list$avg_logFC) > 0.25,]
DEG_list[[i]] <- tmp.DEG_list
cell_number[[i]] <- c(length(cell.group1),length(cell.group2))
cell.group1.name <- gsub("_[1,2,3,4,5,6]$","",cell.group1)
cell.group2.name <- gsub("_[1,2,3,4,5,6]$","",cell.group2)
tmp.AD.ID <- unique(tmp.object$patientID)
AD.cell.name <- names(tmp.merge.object$patient)[ tmp.merge.object$patientID == tmp.AD.ID]
AD.cell.name <- gsub("_[1,2,3,4,5,6]$","",AD.cell.name )
AD_cellGroup1_barcode <- as.data.frame(cbind(ID = AD.cell.name, barcode = "unselected"))
AD_cellGroup2_barcode <- as.data.frame(cbind(ID = AD.cell.name,barcode = "unselected"))
AD_cellGroup1_barcode$barcode[AD_cellGroup1_barcode$ID %in% cell.group1.name] <- paste0("AD", i,": ",tmp.AD.ID,
tmp.layer.info[j],
"_","WFS1+/AT8+")
AD_cellGroup2_barcode$barcode[AD_cellGroup2_barcode$ID %in% cell.group2.name] <- paste0("AD", i,": ",tmp.AD.ID,
tmp.layer.info[j],
"_","Tau+/WFS1-/AT8-")
write.csv(AD_cellGroup1_barcode,file = paste0("./20_vs_17M20_spot_csv_for_loupe/",paste0("AD", i,"_",tmp.AD.ID,"_combination_20_",
tmp.layer.info[j],
"_","WFS1-_AT8+.csv")), quote = F,row.names = F)
write.csv(AD_cellGroup2_barcode,file = paste0("./20_vs_17M20_spot_csv_for_loupe/",paste0("AD", i,"_",tmp.AD.ID,"_combination_17M20_",
tmp.layer.info[j],
"_","Tau+_WFS1-_AT8-.csv")),quote = F,row.names = F)
}
print(tmp.layer)
names(DEG_list) <- c("AD1","AD2")
if (dim(DEG_list[[1]])[1]==0 & dim(DEG_list[[2]])[1]==0){
list_of_datasets[[2*j-1]] <- c("no_result")
names(list_of_datasets)[2*j-1] <- paste0(tmp.layer, "_20_up_DEG")
list_of_datasets[[2*j]] <- c("no_result")
names(list_of_datasets)[2*j] <- paste0(tmp.layer, "_20_down_DEG")
next()
}
# up regulate gene
AD_up_gene_list <- sapply(DEG_list, function(x){
tmp.up_name <- rownames(x)[x$avg_logFC >0]
return(tmp.up_name)
})
AD_up_gene_df <- list_to_matrix(AD_up_gene_list)
AD_up_gene_lgfc_df <- AD_up_gene_df
colnames(AD_up_gene_lgfc_df) <- paste0(tmp.layer,"up_logFC_", colnames(AD_up_gene_df),cell_number)
for (n in 1:ncol(AD_up_gene_df)){
if(length(AD_up_gene_df[,n])==0){
next()
}
for (m in 1:nrow(AD_up_gene_df)){
if(AD_up_gene_df[m,n] ==1){
tmp_AD_up_genename <- rownames(AD_up_gene_df)[m]
AD_up_gene_lgfc <- DEG_list[[n]][tmp_AD_up_genename,c("avg_logFC")]
AD_up_gene_lgfc_df[m,n] <- AD_up_gene_lgfc
}
}
}
AD_up_gene_pval_df <- AD_up_gene_df
colnames(AD_up_gene_pval_df) <- paste0(tmp.layer,"up_Adj.pval_", colnames(AD_up_gene_df),cell_number)
for (n in 1:ncol(AD_up_gene_df)){
if(length(AD_up_gene_df[,n])==0){
next()
}
for (m in 1:nrow(AD_up_gene_df)){
if(AD_up_gene_df[m,n] ==1){
tmp_AD_up_genename <- rownames(AD_up_gene_df)[m]
AD_up_gene_lgfc <- DEG_list[[n]][tmp_AD_up_genename,c("p_val_adj")]
AD_up_gene_pval_df[m,n] <- AD_up_gene_lgfc
}
}
}
# down regulated gene
AD_down_gene_list <- sapply(DEG_list, function(x){
tmp.down_name <- rownames(x)[x$avg_logFC <0]
return(tmp.down_name)
})
AD_down_gene_df <- list_to_matrix(AD_down_gene_list)
AD_down_gene_lgfc_df <- AD_down_gene_df
colnames(AD_down_gene_lgfc_df) <- paste0(tmp.layer,"down_logFC_", colnames(AD_down_gene_df),cell_number)
for (n in 1:ncol(AD_down_gene_df)){
if(length(AD_down_gene_df[,n])==0){
next()
}
for (m in 1:nrow(AD_down_gene_df)){
if(AD_down_gene_df[m,n] ==1){
tmp_AD_down_genename <- rownames(AD_down_gene_df)[m]
AD_down_gene_lgfc <- DEG_list[[n]][tmp_AD_down_genename,c("avg_logFC")]
AD_down_gene_lgfc_df[m,n] <- AD_down_gene_lgfc
}
}
}
AD_down_gene_pval_df <- AD_down_gene_df
colnames(AD_down_gene_pval_df) <- paste0(tmp.layer,"down_Adj.pval_", colnames(AD_down_gene_df),cell_number)
for (n in 1:ncol(AD_down_gene_df)){
if(length(AD_down_gene_df[,n])==0){
next()
}
for (m in 1:nrow(AD_down_gene_df)){
if(AD_down_gene_df[m,n] ==1){
tmp_AD_down_genename <- rownames(AD_down_gene_df)[m]
AD_down_gene_lgfc <- DEG_list[[n]][tmp_AD_down_genename,c("p_val_adj")]
AD_down_gene_pval_df[m,n] <- AD_down_gene_lgfc
}
}
}
final_AD_up_matrix <- as.data.frame(cbind(ID= rownames(AD_up_gene_df),
overlap = rowSums(AD_up_gene_df),
AD_up_gene_df,
AD_up_gene_lgfc_df,
AD_up_gene_pval_df))
final_AD_down_matrix <- as.data.frame(cbind(ID= rownames(AD_down_gene_df),
overlap = rowSums(AD_down_gene_df),
AD_down_gene_df,
AD_down_gene_lgfc_df,
AD_down_gene_pval_df))
# remove up and down gene in one comparison
gene_for_up_and_down <- intersect(rownames(final_AD_up_matrix),rownames(final_AD_down_matrix))
final_AD_up_matrix <- final_AD_up_matrix[rownames(final_AD_up_matrix) %notin%gene_for_up_and_down,]
final_AD_down_matrix <- final_AD_down_matrix[rownames(final_AD_down_matrix) %notin% gene_for_up_and_down,]
if(dim(final_AD_up_matrix)[1] == 0){
final_AD_up_matrix <- as.data.frame(rbind(final_AD_up_matrix ,
rep("no_result",ncol(final_AD_up_matrix))))
}
if(dim(final_AD_down_matrix)[1] == 0){
final_AD_down_matrix <- as.data.frame(rbind(final_AD_down_matrix ,
rep("no_result",ncol(final_AD_down_matrix))))
}
list_of_datasets[[2*j-1]] <- final_AD_up_matrix
names(list_of_datasets)[2*j-1] <- paste0(tmp.layer, "_20_up_DEG")
list_of_datasets[[2*j]] <- final_AD_down_matrix
names(list_of_datasets)[2*j] <- paste0(tmp.layer, "_20_down_DEG")
}
library(org.Hs.eg.db)
library(clusterProfiler)
library(ReactomePA)
final.list <-list()
pathway.list <-list()
for (i in 1:length(list_of_datasets)){
if(list_of_datasets[[i]]=="no_result"){
pathway.list[[i]] <- "no_result"
names(pathway.list)[i] <- names(list_of_datasets)[i]
next()
}
sorted.data <- as.data.frame(list_of_datasets[[i]])
genes.use <- sorted.data$ID[sorted.data$overlap >= 1]
GO_pathway <- enrichGO(gene = genes.use, OrgDb = org.Hs.eg.db, ont = "BP", keyType = "SYMBOL", pAdjustMethod = "BH", pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
if(is.null(GO_pathway)){
GO_simplied_res <- as.data.frame(cbind(NO_results= "NO_result_for_GO"))
}else{
GO_res <- simplify(GO_pathway)
GO_simplied_res <- GO_res@result[GO_res@result$p.adjust < 0.05,]
dim(GO_simplied_res)
if(dim(GO_simplied_res)[1]==0){
GO_simplied_res <- as.data.frame(cbind(NO_results= "NO_result_for_GO"))
}
}
gene.convert <- bitr(genes.use, fromType = "SYMBOL", toType = c("ENSEMBL", "ENTREZID"), OrgDb = org.Hs.eg.db)
reactome_analysis <- enrichPathway(gene=gene.convert$ENTREZID, pvalueCutoff = 0.05, readable=TRUE)
if(is.null(reactome_analysis)){
reactome_res <- as.data.frame(cbind(NO_results= "NO_result_for_reactome"))
}else{
reactome_res <- reactome_analysis@result[reactome_analysis@result$p.adjust <0.05,]
dim(reactome_res)
if(dim(reactome_res)[1]==0){
reactome_res <- as.data.frame(cbind(NO_results= "NO_result_for_reactome"))
}
}
KEGG_pathway <- enrichKEGG(gene = gene.convert$ENTREZID, organism = "hsa", keyType = "kegg", pAdjustMethod = "BH", pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
if(is.null(KEGG_pathway)){
KEGG_res <- as.data.frame(cbind(NO_results= "NO_result_for_KEGG"))
}else{
KEGG_res <- KEGG_pathway@result[KEGG_pathway@result$p.adjust<0.05,]
dim(KEGG_res)
if(dim( KEGG_res)[1]==0){
KEGG_res <- as.data.frame(cbind(NO_results= "NO_result_for_KEGG"))
}else{
for (j in 1:nrow(KEGG_res)){
tmp_name <- KEGG_res$geneID[j]
tmp_name <- unlist(strsplit(tmp_name,"/"))
index_entizID <- gene.convert$ENTREZID %in% tmp_name
converted_geneSymbol <- gene.convert$SYMBOL[index_entizID]
final_name <- paste0(converted_geneSymbol,collapse = "/")
KEGG_res$geneID[j] <- final_name
}
}
}
Go_KEGG.bind <- gdata::cbindX(GO_simplied_res,KEGG_res,reactome_res)
tmp.sorted.data <- as.data.frame(list_of_datasets[[i]])
tmp.sorted.data <- tmp.sorted.data[order(tmp.sorted.data$overlap,decreasing = T),]
pathway.list[[i]] <- gdata::cbindX(tmp.sorted.data,Go_KEGG.bind)
names(pathway.list)[i] <- names(list_of_datasets)[i]
}
for(i in 1:length(pathway.list)){
if(is.null(pathway.list[[i]])){
pathway.list[[i]]<- "no_result"
names(pathway.list)[i] <- names(list_of_datasets)[i]
}
}
#save(pathway.list,file = "pathway.list_19_vs_17M20_remove_overlap_1.RData")
#load("pathway.list_19_vs_17M20_remove_overlap_1.RData")
xlsx::write.xlsx(pathway.list[[1]], file = "20_vs_17M20_remove_overlap_1.xlsx",sheetName= names(pathway.list)[1],row.names = FALSE)
for (sheet.idx in 2:length(pathway.list)){
print(sheet.idx)
xlsx::write.xlsx(pathway.list[[sheet.idx]], file = "20_vs_17M20_remove_overlap_1.xlsx",sheetName= names(pathway.list)[sheet.idx],append = T,row.names = FALSE)
}