前面的步骤请看,模式植物构建orgDb数据库 | 以org.Slycompersicum.eg.db为例 | 一
- 提取KEGG信息 在这里,我们提取的是
<span class="ne-text">KO号列</span>
。
#把Emapper中的query列、KEGG_ko列提出出来
gene2ko <- emapper %>%
dplyr::select(GID = query,
Ko = KEGG_ko) %>%
na.omit()
#将gene2ko的Ko列中的“Ko:”删除,不然后面找pathway会报错
gene2ko$Ko <- gsub("ko:","",gene2ko$Ko)
head(gene2ko)
> head(gene2ko)
GID Ko
1 Solyc00g500001.1.1 -
2 Solyc00g500002.1.1 -
3 Solyc00g500003.1.1 -
4 Solyc00g500019.1.1 -
5 Solyc00g500020.1.1 K02634
6 Solyc00g500021.1.1 K02707,K02713
- 下载KO的json文件,下载地址https://www.genome.jp/kegg-bin/get_htext?ko00001
下载后,放在 <span class="ne-text">路劲文件夹</span>
中。
- 运行一下代码,本地创建
<span class="ne-text">kegg_info.RData</span>
文件
update_kegg <- function(json = "ko00001.json") {
pathway2name <- tibble(Pathway = character(), Name = character())
ko2pathway <- tibble(Ko = character(), Pathway = character())
kegg <- fromJSON(json)
for (a in seq_along(kegg[["children"]][["children"]])) {
A <- kegg[["children"]][["name"]][[a]]
for (b in seq_along(kegg[["children"]][["children"]][[a]][["children"]])) {
B <- kegg[["children"]][["children"]][[a]][["name"]][[b]]
for (c in seq_along(kegg[["children"]][["children"]][[a]][["children"]][[b]][["children"]])) {
pathway_info <- kegg[["children"]][["children"]][[a]][["children"]][[b]][["name"]][[c]]
pathway_id <- str_match(pathway_info, "ko[0-9]{5}")[1]
pathway_name <- str_replace(pathway_info, " \\[PATH:ko[0-9]{5}\\]", "") %>% str_replace("[0-9]{5} ", "")
pathway2name <- rbind(pathway2name, tibble(Pathway = pathway_id, Name = pathway_name))
kos_info <- kegg[["children"]][["children"]][[a]][["children"]][[b]][["children"]][[c]][["name"]]
kos <- str_match(kos_info, "K[0-9]*")[,1]
ko2pathway <- rbind(ko2pathway, tibble(Ko = kos, Pathway = rep(pathway_id, length(kos))))
}
}
}
save(pathway2name, ko2pathway, file = "kegg_info.RData")
}
##'@本地创建kegg_info.RData文件
update_kegg()
##'@载kegg_info.RData
load(file = "kegg_info.RData")
- 根据gene2ko中的ko信息将ko2pathway中的pathway列提取出来
gene2pathway <- gene2ko %>%
left_join(ko2pathway,by = "Ko") %>%
dplyr::select(GID, Pathway) %>%
na.omit()
注意:在此步骤,可能会出现警告提示,大家忽略即可,不要以为是报错信息。
- 在网站https://www.ncbi.nlm.nih.gov/taxonomy上查询物种信息
** 进入网站:**
搜索物种
点进进去
获得相关信息
- 填写对应信息
tax_id="4081"
genus="Solanum"
species="lycompersicum"
- 去重
gene2go <- unique(gene2go)
gene2go <- gene2go[!duplicated(gene2go),]
gene2ko <- gene2ko[!duplicated(gene2ko),]
gene2pathway <- gene2pathway[!duplicated(gene2pathway),]
- 构建OrgDb数据库
makeOrgPackage(gene_info=gene_info,
go=gene2go,
ko=gene2ko,
pathway=gene2pathway,
version="4.0", #版本
maintainer = "du<****@qq.com>", #修改为你的名字和邮箱
author = "du<****@qq.com>", #修改为你的名字和邮箱
outputDir = ".", #输出文件位置
tax_id=tax_id,
genus=genus,
species=species,
goTable="go")
三、GO和KEGG富集分析
3.1 加载自建OrgDb数据库
install.packages("org.Slycompersicum.eg.db/", repos = NULL, type = "sources")
3.2 KEGG富集分析
- 读取DEG_list
DEG_list <- read.csv("DEG_list.csv",header = F)
gene_list <- DEG_list$V1
- KEGG富集分析
pathway2gene <- AnnotationDbi::select(org.Slycompersicum.eg.db,
keys = keys(org.Slycompersicum.eg.db),
columns = c("Pathway","Ko")) %>%
na.omit() %>%
dplyr::select(Pathway, GID)
- 导入 Pathway 与名称对应关系
load("kegg_info.RData")
kegg <- enricher(gene_list,
TERM2GENE = pathway2gene,
TERM2NAME = pathway2name,
pvalueCutoff = 1,
qvalueCutoff = 1,
pAdjustMethod = "BH",
minGSSize = 200)
- 查看KEGG富集结果
kegg_results <- as.data.frame(kegg)
#
head(kegg_results)
##'@导出结果
write.csv(kegg_results,"./KEGG_output.csv")
- 绘制柱状图
pdf("./功能富集/01.kegg.柱状图.pdf",height = 4, width = 6)
barplot(kegg, showCategory= 10,
drop=F,
color="pvalue",
font.size=10)
dev.off()
6. 气泡图
pdf("./功能富集/02.kegg.气泡图.pdf",height = 4, width = 6)
dotplot(kegg)
dev.off()
enrichplot::cnetplot(kegg,circular = F, colorEdge = T)
3.3 GO富集分析
enrich.go <- enrichGO(gene=gene_list,
OrgDb = org.Slycompersicum.eg.db,
# pvalueCutoff = 0.5, ##'@结合自己的需求进行调整
# qvalueCutoff = 0.5, ##'@结合自己的需求进行调整
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
ont = "ALL", ## GO的种类,BP,CC, MF, ALL
keyType = "GID")
- 查看GO富集结果
GO_results <- as.data.frame(enrich.go)
#
head(GO_results)
##'@导出结果
write.csv(kegg_results,"./GO_output.csv")
- 绘制GO富集柱状图
pdf(file = "./功能富集/03.GO_bar.pdf", width = 8, height = 6)
barplot(enrich.go,
drop = TRUE,
showCategory = 10, ## 显示前10个GO term
split="ONTOLOGY") +
scale_y_discrete(labels=function(x) str_wrap(x, width=80))+
facet_grid(ONTOLOGY~., scale='free')
dev.off()
4. GO 气泡图
pdf(file = "./功能富集/04.GO_dotplot.pdf", width = 6, height = 6)
dotplot(enrich.go, showCategory = 10)
dev.off()
- GO网络图
MF_ego <- enrichGO(
gene = gene_list,
keyType = "GID",
OrgDb = org.Slycompersicum.eg.db,
ont = "MF", ##'@GO的种类,BP,CC,MF
pAdjustMethod = "BH",
pvalueCutoff = 0.5##'@结合自己的需求进行调整
)
MF富集网络图
pdf("./功能富集/05.MF_网络图.pdf", width = 8, height = 8)
plotGOgraph(MF_ego)
dev.off()
MF_ego <- pairwise_termsim(MF_ego)
pdf("./功能富集/06.网络图.pdf",width = 8, height = 8)
emapplot(MF_ego, cex.params = list(category_label = 0.8, line = 0.5)) +
scale_fill_continuous(low = "#e06663", high = "#327eba", name = "p.adjust",
guide = guide_colorbar(reverse = TRUE, order=2), trans='log10')
dev.off()
参考资源:
- https://www.jianshu.com/p/693d66681710
- https://www.jianshu.com/p/bb4281e6604e
- https://www.jianshu.com/p/9c9e97167377
- https://bioconductor.org/packages/release/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.html#supported-organisms
❝
若我们的教程对你有所帮助,请 <span class="ne-text">点赞+收藏+转发</span>
,这是对我们最大的支持。
往期部分文章
1. 最全WGCNA教程(替换数据即可出全部结果与图形)
2. 精美图形绘制教程
3. 转录组分析教程
4. 转录组下游分析
小杜的生信筆記** ,主要发表或收录生物信息学教程,以及基于R分析和可视化(包括数据分析,图形绘制等);分享感兴趣的文献和学习资料!!**