-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path9_comparison_with_bulkDream.R
136 lines (96 loc) · 4.7 KB
/
9_comparison_with_bulkDream.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
library(tidyverse)
rm(list=ls())
##################################################################
# comparison between between single cell and bulk datasets reanalysis:
##################################################################
outFolder="12_comparison_with_DreamBulk_RogerV2/"
system(paste0("mkdir -p ", outFolder))
# cell type labels
cell.type.annotation<-read_tsv("cell.type.annotation.v2.tsv")
#cell.type.annotation$color[31]<-"#8B0000"
#write_tsv(cell.type.annotation,file="cell.type.annotation.v2.tsv")
#rownames(cell.type.annotation)<-NULL
clust2Names<-cell.type.annotation$Potential.final #c("Stromal-1","Macrophage-2","Macrophage-1","Endothelial-1","Monocyte","CD4_T-cell","Decidual","CD8_T-cell","LED","Stromal-2","ILC","NK-cell","Smooth muscle cells-1","Stromal Fibroblast","Macrophage-3","Endothelial-2","DC","Smooth muscle cells-2","EVT","Plasmablast","Smooth muscle cells","Macrophage-4","B-cell","Unciliated Epithelial")
clust2Names<-paste0(cell.type.annotation$Cluster,":",clust2Names)
names(clust2Names)<-as.character(cell.type.annotation$Cluster)
# single cell DEGs
res <- read_tsv("./7_outputs_DESeq_ConditionsByCluster_with_covidcontrol_res1.0_library/ALL.combined.2022-03-29.tsv")
res <- res %>% separate(cname,c("Location","Cell_type","Origin"),sep="_",remove=FALSE)
res$Cell_type<-clust2Names[res$Cell_type]
res$cname<-paste0(res$Cell_type,"_",res$Origin)
#res <-res %>% filter(!is.na(log2FoldChange) & !is.na(padj))
res <-res %>% filter(!is.na(log2FoldChange))
dim(res)
cluster.Colors<-cell.type.annotation$color
names(cluster.Colors)<-unique(res$cname)
cluster.Colors<-cluster.Colors[1:length(unique(res$cname))]
#celltype_DE<-table(res$Cell_type,res$Location)
#ENTREZID id
eg = bitr(res$gene_name, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
names(eg)[1]="gene_name"
head(eg)
e2g <- eg$gene_name
names(e2g) <- eg$ENTREZID
#colnames(res)[1]<-"gene_name"
res <- res %>% left_join(eg) ## %>% filter(!is.na(ENTREZID))
dim(res)
##Open reslist
bulkres <- list(PPROM=read_csv(file="preterm_adi/DEG_PPROM.csv"),
sPTD=read_csv(file="preterm_adi/DEG_sPTD.csv"))
##Open reslist
bulkres <- list(PPROM_T1_HTA20=read_csv(file="preterm_adi/DEG_PPROM_T1_HTA20.csv"),
PPROM_T1_HG21ST=read_csv(file="preterm_adi/DEG_PPROM_T1_HG21ST.csv"),
PPROM_T2_HTA20=read_csv(file="preterm_adi/DEG_PPROM_T2_HTA20.csv"),
PPROM_T2_HG21ST=read_csv(file="preterm_adi/DEG_PPROM_T2_HG21ST.csv"),
sPTD_T1_HTA20=read_csv(file="preterm_adi/DEG_sPTD_T1_HTA20.csv"),
sPTD_T1_HG21ST=read_csv(file="preterm_adi/DEG_sPTD_T1_HG21ST.csv"),
sPTD_T2_HTA20=read_csv(file="preterm_adi/DEG_sPTD_T2_HTA20.csv"),
sPTD_T2_HG21ST=read_csv(file="preterm_adi/DEG_sPTD_T2_HG21ST.csv"))
##
scres <- mutate(res,cnameLoc=paste(Location,cname,sep="_"),zscore=log2FoldChange/lfcSE)
scnames <- names(table(scres$cnameLoc))
##scres <- res %>% group_by(cnameLoc) %>% group_split()
scnames2 <- scnames[grepl("CAM",scnames)]
cor.mat = sapply(bulkres,function(bulk){
##bulk <- bulkres[[1]]
sapply(scnames2,function(scn){
##scn="CAM_4:Macrophage-2 (Hofbauer)_M"
scdeg <- filter(scres,cnameLoc==scn)
bulk$ENTREZID <- as.character(bulk$ENTREZID)
aux<-inner_join(bulk,scdeg,by="ENTREZID")
ct = cor.test(aux$t,aux$zscore,method = "pearson")
est=ct$estimate
##if(ct$p.value>0.05){est=NA}
est
})
})
##cor.mat2 <- cor.mat[,grepl("PTLDT",colnames(cor.mat))]
rownames(cor.mat) <- gsub(".cor","",rownames(cor.mat))
p.mat = sapply(bulkres,function(bulk){
##bulk <- bulkres[[1]]
sapply(scnames2,function(scn){
##scn="CAM_4:Macrophage-2 (Hofbauer)_M"
scdeg <- filter(scres,cnameLoc==scn)
bulk$ENTREZID <- as.character(bulk$ENTREZID)
aux<-inner_join(bulk,scdeg,by="ENTREZID")
ct = cor.test(aux$t,aux$zscore,method = "pearson")
ct$p.value
})
})
rownames(p.mat) <- gsub(".cor","",rownames(p.mat))
## probably drop the PVBP
pdf(paste0(outFolder,"bulkCorrPretermSplitT1T2Split2.pdf"),width =5,height=8)
pheatmap::pheatmap(cor.mat,cluster_rows = FALSE, cluster_cols = FALSE)
dev.off()
fname=paste0(outFolder,"bulkCorrPretermSplitT1T2Split2.csv")
cnames <- colnames(p.mat)
plong <- p.mat %>% as.data.frame() %>% rownames_to_column("scCellType") %>%
pivot_longer(cols =cnames, values_to="pval")
clong <- cor.mat %>% as.data.frame() %>% rownames_to_column("scCellType") %>%
pivot_longer(cols =cnames, values_to="corr")
mytable <- inner_join(plong,clong) %>% separate(name,c("type","time","cohort"),"_")
write_csv(mytable,fname)
## probably drop the PVBP
# pdf(paste0(outFolder,"bulkCorrPretermJointT1T2.pdf"),width =4,height=8)
# pheatmap::pheatmap(cor.mat,cluster_rows = FALSE, cluster_cols = FALSE)
# dev.off()