pheatmap

ctomclancy

2019-06-21 00:16 IP属地: 湖南

一、创建一个随机矩阵10x20

test = matrix(rnorm(200), 20, 10) test[1:10, seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3 test[11:20, seq(2, 10, 2)] = test[11:20, seq(2, 10, 2)] + 2 test[15:20, seq(2, 10, 2)] = test[15:20, seq(2, 10, 2)] + 4 colnames(test) = paste("Test", 1:10, sep = "") rownames(test) = paste("Gene", 1:20, sep = "") １.基本作图 pheatmap(test) #基本图 pheatmap(test, kmeans_k = 2)　　　　#将行聚为2类 pheatmap(test, scale = "row", clustering_distance_rows = "correlation") #标准化 pheatmap(test, color = colorRampPalette(c("navy", "white", "firebrick3"))(10)) #设置颜色，后面括号里的数字表示梯度 pheatmap(test, cluster_row = FALSE) #是否显示行的聚类 pheatmap(test, legend = FALSE) #是否显示图例 scale是指对数值进行均一化处理，在基因表达量的数据中，有些基因表达量极低，有些基因表达量极高，因此把每个基因在不同处理和重复中的数据转换为平均值为0，方差为1的数据，可以看出每个基因在某个处理和重复中表达量是高还是低，一般选择做row均一化。 2.显示色块的数值、文本 pheatmap(test, display_numbers = TRUE) #基本用法 pheatmap(test, display_numbers = TRUE, number_format = "%.1e") # "%.1e"用科学计数法显示保留1位小数；"%.3f"用小数显示保留３位小数 pheatmap(test, display_numbers = matrix(ifelse(test > 5, "*", ""), nrow(test))) #以*赋值矩阵> 5的色块 pheatmap(test, cluster_row = FALSE, legend_breaks = -1:4, legend_labels = c("0", "1e-4", "1e-3", "1e-2", "1e-1", "1")) #legend_breaks设置图例的显示范围，默认间隔为1；legend_labels重写刻度的标签, 需与legend_breaks同时使用。 3.调整色块或文本大小 pheatmap(test, cellwidth = 15, cellheight = 12, main = "Example heatmap", fontsize = 8, filename = "test.pdf") #参数分别表示：色块的宽度、色块的高度、标题、行列名 #及图例字体的大小、保存为当前工作目录下的图片的文件名 4.行列注释首先创建annotation_col 与annotation_row annotation_col = data.frame( CellType = factor(rep(c("CT1", "CT2"), 5)), Time = 1:5 rownames(annotation_col) = paste("Test", 1:10, sep = "") annotation_row = data.frame( GeneClass = factor(rep(c("Path1", "Path2", "Path3"), c(10, 4, 6))) rownames(annotation_row) = paste("Gene", 1:20, sep = "") 显示行、列注释信息 pheatmap(test) pheatmap(test, annotation_col = annotation_col) pheatmap(test, annotation_col = annotation_col, annotation_legend = FALSE) pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row) 5.改变列名文本角度 pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row, angle_col = "45") pheatmap(test, annotation_col = annotation_col, angle_col = "0") 6.自定义注释色块的颜色 ann_colors = list( Time = c("white", "firebrick"), CellType = c(CT1 = "#1B9E77", CT2 = "#D95F02"), GeneClass = c(Path1 = "#7570B3", Path2 = "#E7298A", Path3 = "#66A61E") ) #注意ann_colors是列表 pheatmap(test, annotation_col = annotation_col, annotation_colors = ann_colors, main = "Title") pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row, annotation_colors = ann_colors) pheatmap(test, annotation_col = annotation_col, annotation_colors = ann_colors[2]) 7.切分热图 pheatmap(test, annotation_col = annotation_col, cluster_rows = FALSE, gaps_row = c(10, 14)) pheatmap(test, annotation_col = annotation_col, cluster_rows = FALSE, gaps_row = c(10, 14), cutree_col = 2) #gaps_row有效的前提是cluster_rows = F；cutree_col有效的前提是cluster_cols = T 8.自定义显示哪些行列的名字 labels_row = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "Il10", "Il15", "Il1b") pheatmap(test, annotation_col = annotation_col, labels_row = labels_row) 9.用距离矩阵的方法来聚类 drows = dist(test, method = "minkowski") dcols = dist(t(test), method = "minkowski") pheatmap(test, clustering_distance_rows = drows, clustering_distance_cols = dcols) callback = function(hc, mat){ sv = svd(t(mat))$v[,1] dend = reorder(as.dendrogram(hc), wts = sv) as.hclust(dend) pheatmap(test, clustering_callback = callback) 取消边框或更改其颜色 pheatmap(test, border_color = NA) pheatmap(test, border_color = 'red') 三、参数调整：１. 颜色参数：默认值为colorRampPalette(rev(brewer.pal(n = 7, name ="RdYlBu")))(100)，RdYlBu也就是Rd红色，Yi黄色，Bu蓝色的过度，则主调色为红黄蓝。颜色大全网址：https://www.color-hex.com/color-names.html 2 数据变换参数： scale，是指对数值进行均一化处理，在基因表达量的数据中，有些基因表达量极低，有些基因表达量极高，因此把每个基因在不同处理和重复中的数据转换为平均值为0，方差为1的数据，可以看出每个基因在某个处理和重复中表达量是高还是低，一般选择做row均一化。 clustering_method，表示聚类方法，值可以是hclust的任何一种，如"ward.D","single", "complete", "average", "mcquitty", "median", "centroid", "ward.D2"。 cluster_rows，表示行是否聚类，值可以是FALSE或TRUE clustering_distance_rows，行距离度量的方法，如欧氏距离 cutree_rows，行聚类数 treeheight_row，行聚类树的高度，默认为50 gaps_row，对行进行分割，就不应对相应的行进行聚类 cluster_cols，表示列是否聚类，值可以是FALSE或TRUE clustering_distance_cols，列距离度量的方法 cutree_cols，列聚类数 treeheight_col，列聚类树的高度，默认为50 gaps_col，对列进行分割，就不应对相应的列进行聚类３.色度条--就是热图右上角那个小小的长方条 legend，逻辑值，是否显示色度条，默认为T legend_breaks，显示多少个颜色数值段 legend_labels，对色度条上对应位置的字符进行修改 4.注释条 annotation = NA annotation_colors，对标签的颜色进行修改 annotation_legend，是否显示标签注释条 annotation_row，数据框格式，用来定义热图所在行的注释条 annotation_names_row，逻辑值，是否显示行标签名称 annotation_col，数据框格式，用来定义热图所在列的注释条 annotation_names_col，逻辑值，是否显示列标签名称 5.其他修改参数 main，设置图的标题 fontsize，是设置所有除主图以外的标签的大小 number_color，字体的颜色 show_rownames，是否显示行名 fontsize_row，行名的字体大小 labels_row，X轴坐标名设置 show_colnames，是否显示列名 fontsize_col，列名的字体大小 labels_col，y轴坐标名设置 6.小格子参数设置--热图是由一个个的小四方格子组成的，每一个小格子代表一个基因在一个样本内的表达情况 fontsize_number，小格子中数字大小 display_numbers，是否在小格子中显示数字，逻辑值 number_format，小格子中数字显示形式，但仅有在display_numbers=T时才能使用 na_col，设置小格子为缺失值时的颜色 cellwidth，表示每个小格子的宽度 cellheight，表示每个小格子的高度 7.输出文件参数设置，一般可以直接将画好的热图以png格式或者pdf格式进行写出 filename，输出图画的文件名 width，输出图画的宽度 height，输出图画的高度