• GEO生信数据挖掘(九)肺结核数据-差异分析-WGCNA分析(900行代码整理注释更新版本)


    第六节,我们使用结核病基因数据,做了一个数据预处理的实操案例。例子中结核类型,包括结核,潜隐进展,对照和潜隐,四个类别。第七节延续上个数据,进行了差异分析。 第八节对差异基因进行富集分析。本节进行WGCNA分析。

    WGCNA分析 分段代码(附运行效果图)请查看上节

    运行后效果

    1. rm(list = ls()) ######清除环境数据
    2. #============================================================================
    3. #======================================================================
    4. #+========step0数据预处理和检查,已经做过step0==========================
    5. #+========================================
    6. #+=============================
    7. """
    8. ##############设置工作路径###################
    9. workingDir = "C:/Users/Desktop/GSE152532"############工作路径,可以修改,可以设置为数据存放路径
    10. setwd(workingDir)
    11. getwd()
    12. ################载入R包########################
    13. library(WGCNA)
    14. library(data.table)
    15. #############################导入数据##########################
    16. # The following setting is important, do not omit.
    17. options(stringsAsFactors = FALSE)
    18. #Read in the female liver data set
    19. fpkm = fread("Gene_expression.csv",header=T)##############数据文件名,根据实际修改,如果工作路径不是实际数据路径,需要添加正确的数据路径
    20. # Take a quick look at what is in the data set
    21. dim(fpkm)
    22. names(fpkm)
    23. ####################导入平台数据##########################
    24. library(idmap2)
    25. ids=get_soft_IDs('GPL10558')
    26. head(ids)
    27. #####################将探针ID改为基因ID##########################
    28. fpkm<-merge(fpkm,ids,by='ID')#merge()函数将dat1的探针id与芯片平台探针id相匹配,合并到dat1
    29. library(limma)
    30. fpkm<-avereps(fpkm[,-c(1,99)],ID=fpkm$symbol)#多个探针检测一个基因,合并一起,取其平均值
    31. fpkm<-as.data.frame(fpkm)#将矩阵转换为表格
    32. write.table(fpkm, file="FPKM_genesymbol.csv",row.names=T, col.names=T,quote=FALSE,sep=",")
    33. ###结束后查看文件,进行修改!!!
    34. # 加载自己的数据
    35. # load( "group_data_TB_LTBI.Rdata")
    36. load("exprSet_clean_mean_filter_log1.RData") #exprSet_clean
    37. load( "dataset_TB_LTBI.Rdata")
    38. exprSet_clean = dataset_TB_LTBI
    39. gene_var <- apply(exprSet_clean, 1, var)##### 计算基因的方差
    40. keep_genes <- gene_var >= quantile(gene_var, 0.75)##### 筛选方差较大的基因,选择方差前25%的基因
    41. exprSet_clean <- exprSet_clean[keep_genes,]##### 保留筛选后的基因
    42. dim(exprSet_clean)
    43. save (exprSet_clean,file="方差前25per_TB_LTBI.Rdata")
    44. #######################基于方差筛选基因#################################
    45. fpkm_var <- read.csv("FPKM_genesymbol.csv", header = TRUE, row.names = 1)##### 读入表达矩阵,矩阵的行是基因,列是样本
    46. gene_var <- apply(fpkm_var, 1, var)##### 计算基因的方差
    47. keep_genes <- gene_var >= quantile(gene_var, 0.75)##### 筛选方差较大的基因,选择方差前25%的基因
    48. fpkm_var <- fpkm_var[keep_genes,]##### 保留筛选后的基因
    49. write.table(fpkm_var, file="FPKM_var.csv",row.names=T, col.names=T,quote=FALSE,sep=",")
    50. ###结束后查看文件,进行修改!!!
    51. ##################重新载入数据########################
    52. # The following setting is important, do not omit.
    53. options(stringsAsFactors = FALSE)
    54. #Read in the female liver data set
    55. fpkm = fread("FPKM_var_filter.csv",header=T)##############数据文件名,根据实际修改,如果工作路径不是实际数据路径,需要添加正确的数据路径
    56. # Take a quick look at what is in the data set
    57. dim(fpkm)
    58. names(fpkm)
    59. datExpr0 = as.data.frame(t(fpkm[,-1]))
    60. names(datExpr0) = fpkm$ID;##########如果第一行是ID命名,就写成fpkm$ID
    61. rownames(datExpr0) = names(fpkm[,-1])
    62. ##################check missing value and filter ####################
    63. load("方差前25per_TB_LTBI.Rdata")
    64. datExpr0 = exprSet_clean
    65. ##check missing value
    66. library(WGCNA)
    67. gsg = goodSamplesGenes(datExpr0, verbose = 3)
    68. gsg$allOK
    69. if (!gsg$allOK)
    70. {
    71. # Optionally, print the gene and sample names that were removed:
    72. if (sum(!gsg$goodGenes)>0)
    73. printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")))
    74. if (sum(!gsg$goodSamples)>0)
    75. printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
    76. # Remove the offending genes and samples from the data:
    77. datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
    78. }
    79. ##filter
    80. #meanFPKM=0.5 ####过滤标准,可以修改
    81. #n=nrow(datExpr0)
    82. #datExpr0[n+1,]=apply(datExpr0[c(1:nrow(datExpr0)),],2,mean)
    83. #datExpr0=datExpr0[1:n,datExpr0[n+1,] > meanFPKM] # for meanFpkm in row n+1 and it must be above what you set--select meanFpkm>opt$meanFpkm(by rp)
    84. filtered_fpkm=t(datExpr0) #行 样本 列 基因
    85. filtered_fpkm=data.frame(rownames(filtered_fpkm),filtered_fpkm)
    86. names(filtered_fpkm)[1]="sample"
    87. write.table(filtered_fpkm, file="FPKM_filter.csv",row.names=F, col.names=T,quote=FALSE,sep="\t")
    88. """
    89. #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    90. #+&&&&&&&&&&&&&&&&&&加载数据&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    91. #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    92. load('DEG_TB_LTBI_step13.Rdata') # DEG,res,all_diff,limma_clean_res,dataset_TB_LTBI_DEG,
    93. #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    94. #+&&&&&&&&&&&&&&&&&&加载数据&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    95. #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    96. library(WGCNA)
    97. #读取目录名称,方便复制粘贴
    98. dir()
    99. #============================================================================
    100. #======================================================================
    101. #+========step1样品聚类step1=================================
    102. #+========================================
    103. #+=============================
    104. ################################样品聚类####################
    105. #这里行是样品名,列为基因名,做转置处理
    106. datExpr = t(dataset_TB_LTBI_DEG)
    107. #初次聚类
    108. sampleTree = hclust(dist(datExpr), method = "average")
    109. # Plot the sample tree: Open a graphic output window of size 20 by 15 inches
    110. # The user should change the dimensions if the window is too large or too small.
    111. #设置绘图窗口
    112. sizeGrWindow(12,9)
    113. pdf(file='1_sampleCluestering_初次聚类检查偏离样本.pdf',width = 12,height = 9)
    114. par(cex=0.6)
    115. par(mar=c(0,4,2,0))
    116. plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
    117. cex.axis = 1.5, cex.main = 2)
    118. dev.off()
    119. #============================================================================
    120. #======================================================================
    121. #+========step2切割离群样本=================================
    122. #+========================================
    123. #+=============================
    124. pdf(file='2_sampleCluestering_初次聚类进行切割删除样本.pdf',width = 12,height = 9)
    125. par(cex=0.6)
    126. par(mar=c(0,4,2,0))
    127. plot(sampleTree, main = "Sample clustering to detect outliers ", sub="", xlab="", cex.lab = 1.5,
    128. cex.axis = 1.5, cex.main = 2)
    129. ### 测试画线,可以多次尝试
    130. ##############剪切高度问题,这个根据实际设置后可用
    131. abline(h = 87, col = "red")##剪切高度不确定,故无红线
    132. dev.off()
    133. ### Determine cluster under the line
    134. clust = cutreeStatic(sampleTree, cutHeight = 87, minSize = 10)
    135. table(clust)
    136. #clust
    137. #0 1 2
    138. #5 57 40
    139. #由于本人案例,一刀切出三段,需要保留两段,用了’或‘逻辑运算符号
    140. ### 需要保留哪个,就传如要保留clust编号
    141. keepSamples = (clust==1|clust==2)
    142. #剔除离群样本
    143. datExpr0 = datExpr[keepSamples, ]
    144. #观察新表达矩阵
    145. dim(datExpr0) #[1] 97 2813
    146. #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    147. #+&&&&&&&&&&&&&&&&&&数据保存&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    148. #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    149. save(datExpr0,file='3.聚类后剔除离群样本datExpr0.Rdata')#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    150. #+&&&&&&&&&&&&&&&&&&数据保存&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    151. #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    152. load('datExpr0_cluster_filter.Rdata')
    153. #============================================================================
    154. #======================================================================
    155. #+========step3临床性状数据整理,与新表达矩阵保持一致=================================
    156. #+========================================
    157. #+=============================
    158. #加载自己的临床性状数据
    159. load('design_TB_LTBI.Rdata')
    160. traitData=design
    161. dim(traitData)
    162. # Form a data frame analogous to expression data that will hold the clinical traits.
    163. fpkmSamples = rownames(datExpr0)
    164. traitSamples =rownames(traitData)
    165. #匹配样本名称,性状数据与表达数据保证一致
    166. traitRows = match(fpkmSamples, traitSamples)
    167. datTraits = traitData[traitRows,]
    168. rownames(datTraits)
    169. collectGarbage()
    170. #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    171. #+&&&&&&&&&&&&&&&&&&数据保存&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    172. #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    173. save(datTraits,file='4.剔除离群样本的临床性状数据datTraits.Rdata')#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    174. #+&&&&&&&&&&&&&&&&&&数据保存&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    175. #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    176. #============================================================================
    177. #======================================================================
    178. #+========step4 增加临床性状数据后再次聚类=======================
    179. #+========================================
    180. #+=============================
    181. # Re-cluster samples
    182. sampleTree2 = hclust(dist(datExpr0), method = "average")
    183. # Convert traits to a color representation: white means low, red means high, grey means missing entry
    184. traitColors = numbers2colors(datTraits, signed = FALSE)
    185. # Plot the sample dendrogram and the colors underneath.
    186. #sizeGrWindow(20,20)
    187. pdf(file="5_Sample cluster dendrogram and trait heatmap.pdf",width=12,height=12)
    188. plotDendroAndColors(sampleTree2, traitColors,
    189. groupLabels = names(datTraits),
    190. main = "Sample dendrogram and trait heatmap")
    191. #Error in .plotOrderedColorSubplot(order = order, colors = colors, rowLabels = rowLabels, :
    192. # Length of colors vector not compatible with number of objects in 'order'.
    193. dev.off()
    194. #============================================================================
    195. #======================================================================
    196. #+========step5 构建WGCNA网络=======================
    197. #+========================================
    198. #+=============================
    199. # Allow multi-threading within WGCNA. At present this call is necessary.
    200. # Any error here may be ignored but you may want to update WGCNA if you see one.
    201. # Caution: skip this line if you run RStudio or other third-party R environments.
    202. # See note above.
    203. #检查环境,能开几个线程
    204. enableWGCNAThreads()
    205. # Choose a set of soft-thresholding powers
    206. #设置阈值范围,WGCNA是无标度网络(scale free),节点连结数服从幂次定律分布。(连接数越多核心节点越少)
    207. powers = c(1:15)
    208. # Call the network topology analysis function
    209. #网络拓扑分析
    210. sft = pickSoftThreshold(datExpr0, powerVector = powers, verbose = 5)
    211. # Plot the results:
    212. sizeGrWindow(15, 9)
    213. pdf(file="6_Scale independence选阈值测试过程.pdf",width=9,height=5)
    214. #pdf(file="Rplot03.pdf",width=9,height=5)
    215. par(mfrow = c(1,2))
    216. cex1 = 0.9
    217. # Scale-free topology fit index as a function of the soft-thresholding power
    218. #无标度拓扑拟合指标作为软阈值能力的函数,根据下图结果,挑选合适阈值
    219. plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
    220. xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
    221. main = paste("Scale independence"));
    222. text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
    223. labels=powers,cex=cex1,col="red");
    224. # this line corresponds to using an R^2 cut-off of h
    225. abline(h=0.90,col="red")
    226. # Mean connectivity as a function of the soft-thresholding power
    227. plot(sft$fitIndices[,1], sft$fitIndices[,5],
    228. xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
    229. main = paste("Mean connectivity"))
    230. text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
    231. dev.off()
    232. ######chose the softPower
    233. #选择阈值
    234. softPower =sft$powerEstimate
    235. adjacency = adjacency(datExpr0, power = softPower)
    236. ##### Turn adjacency into topological overlap
    237. #将邻接转换为拓扑重叠
    238. TOM = TOMsimilarity(adjacency);
    239. dissTOM = 1-TOM
    240. # Call the hierarchical clustering function
    241. #无标度网络阈值参数确定后,调用分层聚类函数
    242. #基于TOM的不相似性基因聚类
    243. geneTree = hclust(as.dist(dissTOM), method = "average");
    244. # Plot the resulting clustering tree (dendrogram)
    245. #sizeGrWindow(12,9)
    246. pdf(file="7_Gene clustering on TOM-based dissimilarity基因聚类图.pdf",width=12,height=9)
    247. plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
    248. labels = FALSE, hang = 0.04)
    249. dev.off()
    250. #聚类模块,最小的基因数量
    251. # We like large modules, so we set the minimum module size relatively high:
    252. minModuleSize = 30
    253. # Module identification using dynamic tree cut:
    254. #使用dynamic tree cut进行模块识别
    255. dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
    256. deepSplit = 2, pamRespectsDendro = FALSE,
    257. minClusterSize = minModuleSize);
    258. table(dynamicMods)
    259. # Convert numeric lables into colors
    260. #给不同模块分配颜色
    261. dynamicColors = labels2colors(dynamicMods)
    262. table(dynamicColors)
    263. # Plot the dendrogram and colors underneath
    264. #sizeGrWindow(8,6)
    265. pdf(file="8_带颜色标识的聚类树Dynamic Tree Cut.pdf",width=8,height=6)
    266. plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
    267. dendroLabels = FALSE, hang = 0.03,
    268. addGuide = TRUE, guideHang = 0.05,
    269. main = "Gene dendrogram and module colors")
    270. dev.off()
    271. # Calculate eigengenes
    272. MEList = moduleEigengenes(datExpr0, colors = dynamicColors)
    273. MEs = MEList$eigengenes
    274. # Calculate dissimilarity of module eigengenes
    275. MEDiss = 1-cor(MEs);
    276. # Cluster module eigengenes
    277. METree = hclust(as.dist(MEDiss), method = "average")
    278. # Plot the result
    279. #sizeGrWindow(7, 6)
    280. pdf(file="9_Clustering of module eigengenes.pdf",width=7,height=6)
    281. plot(METree, main = "Clustering of module eigengenes",
    282. xlab = "", sub = "")
    283. MEDissThres = 0.25######剪切高度可修改
    284. # Plot the cut line into the dendrogram
    285. abline(h=MEDissThres, col = "red")
    286. dev.off()
    287. # Call an automatic merging function
    288. #根据前面设置的剪切高度,对模块进行合并
    289. merge = mergeCloseModules(datExpr0, dynamicColors, cutHeight = MEDissThres, verbose = 3)
    290. # The merged module colors
    291. mergedColors = merge$colors
    292. # Eigengenes of the new merged modules:
    293. mergedMEs = merge$newMEs
    294. #sizeGrWindow(12, 9)
    295. pdf(file="10_合并模块后的聚类树merged dynamic.pdf", width = 9, height = 6)
    296. plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
    297. c("Dynamic Tree Cut", "Merged dynamic"),
    298. dendroLabels = FALSE, hang = 0.03,
    299. addGuide = TRUE, guideHang = 0.05)
    300. dev.off()
    301. # Rename to moduleColors
    302. moduleColors = mergedColors
    303. # Construct numerical labels corresponding to the colors
    304. #构建相应颜色的数字标签
    305. colorOrder = c("grey", standardColors(50))
    306. moduleLabels = match(moduleColors, colorOrder)-1
    307. MEs = mergedMEs
    308. # Save module colors and labels for use in subsequent parts
    309. #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    310. #+&&&&&&&&&&&&&&&&&&数据保存&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    311. #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    312. save(datExpr0,datTraits,MEs, TOM, dissTOM, moduleLabels, moduleColors, geneTree, sft, file = "11_networkConstruction-stepByStep.RData")
    313. #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    314. #+&&&&&&&&&&&&&&&&&&数据保存&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    315. #&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
    316. load("11_networkConstruction-stepByStep.RData")
    317. #=====================================================================================
    318. #===============================================================================
    319. #+========step6 计算模块和临床性状相关系数(核心挑选色块)==============
    320. #+========================================
    321. #+=============================
    322. ##############################relate modules to external clinical triats
    323. # Define numbers of genes and samples
    324. nGenes = ncol(datExpr0)
    325. nSamples = nrow(datExpr0)
    326. #可以修改参数 p值pvalue 更换
    327. moduleTraitCor = cor(MEs, datTraits, use = "p")
    328. moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
    329. #sizeGrWindow(10,6)
    330. pdf(file="12_模块和临床形状关系图Module-trait relationships.pdf",width=10,height=6)
    331. # Will display correlations and their p-values
    332. textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
    333. signif(moduleTraitPvalue, 1), ")", sep = "")
    334. dim(textMatrix) = dim(moduleTraitCor)
    335. par(mar = c(6, 8.5, 3, 3))
    336. # Display the correlation values within a heatmap plot #修改性状类型 data.frame
    337. labeledHeatmap(Matrix = moduleTraitCor,
    338. xLabels = names(data.frame(datTraits)),
    339. yLabels = names(MEs),
    340. ySymbols = names(MEs),
    341. colorLabels = FALSE,
    342. colors = greenWhiteRed(50),
    343. textMatrix = textMatrix,
    344. setStdMargins = FALSE,
    345. cex.text = 0.5,
    346. zlim = c(-1,1),
    347. main = paste("Module-trait relationships"))
    348. dev.off()
    349. #色块 red相关度 0.75
    350. #=====================================================================================
    351. #===============================================================================
    352. #+========step7 定义包含所有datTraits列的可变权重(MM and GS)==============
    353. #+========================================
    354. #+=============================
    355. #定义包含所有datTraits列的可变权重
    356. ######## Define variable weight containing all column of datTraits
    357. ###MM(gene Module Membership) and GS(gene Trait Significance)
    358. # names (colors) of the modules
    359. modNames = substring(names(MEs), 3)
    360. geneModuleMembership = as.data.frame(cor(datExpr0, MEs, use = "p"))
    361. MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
    362. names(geneModuleMembership) = paste("MM", modNames, sep="")
    363. names(MMPvalue) = paste("p.MM", modNames, sep="")
    364. #names of those trait
    365. traitNames=names(data.frame(datTraits))
    366. class(datTraits)
    367. geneTraitSignificance = as.data.frame(cor(datExpr0, datTraits, use = "p"))
    368. GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples))
    369. names(geneTraitSignificance) = paste("GS.", traitNames, sep="")
    370. names(GSPvalue) = paste("p.GS.", traitNames, sep="")
    371. ####plot MM vs GS for each trait vs each module
    372. ##########example:royalblue and CK
    373. module="red"
    374. column = match(module, modNames)
    375. moduleGenes = moduleColors==module
    376. trait="TB"
    377. traitColumn=match(trait,traitNames)
    378. sizeGrWindow(7, 7)
    379. #par(mfrow = c(1,1))
    380. verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
    381. abs(geneTraitSignificance[moduleGenes, traitColumn]),
    382. xlab = paste("Module Membership in", module, "module"),
    383. ylab = paste("Gene significance for ",trait),
    384. main = paste("Module membership vs. gene significance\n"),
    385. cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
    386. ######
    387. for (trait in traitNames){
    388. traitColumn=match(trait,traitNames)
    389. for (module in modNames){
    390. column = match(module, modNames)
    391. moduleGenes = moduleColors==module
    392. if (nrow(geneModuleMembership[moduleGenes,]) > 1){####进行这部分计算必须每个模块内基因数量大于2,由于前面设置了最小数量是30,这里可以不做这个判断,但是grey有可能会出现1个gene,它会导致代码运行的时候中断,故设置这一步
    393. #sizeGrWindow(7, 7)
    394. pdf(file=paste("13_", trait, "_", module,"_Module membership vs gene significance.pdf",sep=""),width=7,height=7)
    395. par(mfrow = c(1,1))
    396. verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
    397. abs(geneTraitSignificance[moduleGenes, traitColumn]),
    398. xlab = paste("Module Membership in", module, "module"),
    399. ylab = paste("Gene significance for ",trait),
    400. main = paste("Module membership vs. gene significance\n"),
    401. cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
    402. dev.off()
    403. }
    404. }
    405. }
    406. #####
    407. names(datExpr0)
    408. probes = names(data.frame(datExpr0))
    409. #=====================================================================================
    410. #===============================================================================
    411. #+========step8 导出计算完毕的(MM and GS)==============
    412. #+========================================
    413. #+=============================
    414. #################export GS and MM###############
    415. geneInfo0 = data.frame(probes= probes,
    416. moduleColor = moduleColors)
    417. for (Tra in 1:ncol(geneTraitSignificance))
    418. {
    419. oldNames = names(geneInfo0)
    420. geneInfo0 = data.frame(geneInfo0, geneTraitSignificance[,Tra],
    421. GSPvalue[, Tra])
    422. names(geneInfo0) = c(oldNames,names(geneTraitSignificance)[Tra],
    423. names(GSPvalue)[Tra])
    424. }
    425. for (mod in 1:ncol(geneModuleMembership))
    426. {
    427. oldNames = names(geneInfo0)
    428. geneInfo0 = data.frame(geneInfo0, geneModuleMembership[,mod],
    429. MMPvalue[, mod])
    430. names(geneInfo0) = c(oldNames,names(geneModuleMembership)[mod],
    431. names(MMPvalue)[mod])
    432. }
    433. geneOrder =order(geneInfo0$moduleColor)
    434. geneInfo = geneInfo0[geneOrder, ]
    435. write.table(geneInfo, file = "14_GS_and_MM.xls",sep="\t",row.names=F)
    436. #=====================================================================================
    437. #===============================================================================
    438. #+========step9 基因网络热图进行可视化(非常耗费内存资源)==============
    439. #+========================================
    440. #+=============================
    441. nGenes = ncol(datExpr0)
    442. nSamples = nrow(datExpr0)
    443. # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
    444. plotTOM = dissTOM^7
    445. # Set diagonal to NA for a nicer plot
    446. diag(plotTOM) = NA
    447. # Call the plot function
    448. sizeGrWindow(9,9) #这个耗电脑内存
    449. pdf(file="15_所有基因数量太多Network heatmap plot_all gene.pdf",width=9, height=9)
    450. TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
    451. dev.off()
    452. nSelect = 400
    453. # For reproducibility, we set the random seed
    454. set.seed(10)
    455. select = sample(nGenes, size = nSelect)
    456. selectTOM = dissTOM[select, select]
    457. # There's no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
    458. selectTree = hclust(as.dist(selectTOM), method = "average")
    459. selectColors = moduleColors[select]
    460. # Open a graphical window
    461. #sizeGrWindow(9,9)
    462. # Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
    463. # the color palette; setting the diagonal to NA also improves the clarity of the plot
    464. plotDiss = selectTOM^7
    465. diag(plotDiss) = NA
    466. pdf(file="16_400个基因试试Network heatmap plot_selected genes.pdf",width=9, height=9)
    467. TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
    468. dev.off()
    469. #=====================================================================================
    470. #===============================================================================
    471. #+========step10 新模块和临床性状热图 合并和拆分两个版本==============
    472. #+========================================
    473. #+=============================
    474. #sizeGrWindow(5,7.5)
    475. pdf(file="17新模块和临床性状热图_Eigengene dendrogram and Eigengene adjacency heatmap.pdf", width=5, height=7.5)
    476. par(cex = 0.9)
    477. plotEigengeneNetworks(MEs, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle= 90)
    478. dev.off()
    479. #or devide into two parts
    480. # Plot the dendrogram
    481. #sizeGrWindow(6,6);
    482. pdf(file="18_Eigengene dendrogram_2.pdf",width=6, height=6)
    483. par(cex = 1.0)
    484. plotEigengeneNetworks(MEs, "Eigengene dendrogram", marDendro = c(0,4,2,0), plotHeatmaps = FALSE)
    485. dev.off()
    486. pdf(file="19_Eigengene adjacency heatmap_2.pdf",width=6, height=6)
    487. # Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot)
    488. par(cex = 1.0)
    489. plotEigengeneNetworks(MEs, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2), plotDendrograms = FALSE, xLabelsAngle = 90)
    490. dev.off()
    491. ###########################Exporting to Cytoscape all one by one ##########################
    492. #=====================================================================================
    493. #===============================================================================
    494. #+========step11 导出每个模块的边和节点关系(Cytoscape 绘图所需)==============
    495. #+========================================
    496. #+=============================
    497. # Select each module
    498. '''
    499. Error in exportNetworkToCytoscape(modTOM, edgeFile = paste("CytoscapeInput-edges-", :
    500. Cannot determine node names: nodeNames is NULL and adjMat has no dimnames.
    501. datExpr0 格式需要dataframe
    502. '''
    503. modules =module #module="red"
    504. for (mod in 1:nrow(table(moduleColors)))
    505. {
    506. modules = names(table(moduleColors))[mod]
    507. # Select module probes
    508. probes = names(data.frame(datExpr0)) #
    509. inModule = (moduleColors == modules)
    510. modProbes = probes[inModule]
    511. modGenes = modProbes
    512. # Select the corresponding Topological Overlap
    513. modTOM = TOM[inModule, inModule]
    514. dimnames(modTOM) = list(modProbes, modProbes)
    515. # Export the network into edge and node list files Cytoscape can read
    516. cyt = exportNetworkToCytoscape(modTOM,
    517. edgeFile = paste("20_CytoscapeInput-edges-", modules , ".txt", sep=""),
    518. nodeFile = paste("20_CytoscapeInput-nodes-", modules, ".txt", sep=""),
    519. weighted = TRUE,
    520. threshold = 0.02,
    521. nodeNames = modProbes,
    522. altNodeNames = modGenes,
    523. nodeAttr = moduleColors[inModule])
    524. }

    WGCNA关系网络的构建完毕,绘图找核心基因,Cytoscape 到底怎么玩?

  • 相关阅读:
    AT800(3000) +昇腾300V 之 第一个例子图片分类
    平移矩阵中的数学思考
    hyperf 十六 session
    为什么PDF打开没有密码,但是不能编辑?
    力扣511. 游戏玩法分析 I
    密码保护工具的编写
    b2b2c o2o 多商家入驻商城 直播带货商城 电子商务
    mysql编译安装教程
    Linux驱动开发:掌握SPI通信机制
    Linux系统无法连接网络的情况
  • 原文地址:https://blog.csdn.net/zzh1464501547/article/details/133915414