R Graphics Cookbook 第七章 Annotations 整理而来。


用annotate()和text geom(文字对象):如下: annotate 里面用"text"类型,然后x,y分别代表文本的位置。label的内容是文本的实际内容。 annotate 中的类型

  • text 文本
  • rect 阴影矩形
  • segment 线段
library(ggplot2)# 创建一个图层p <- ggplot(faithful, aes(x = eruptions, y = waiting)) + geom_point()# 在图层上添加文本p + annotate("text", x = 3, y = 48, label = "Group 1") + annotate("text", x = 4.5,     y = 66, label = "Group 2")

plot of chunk unnamed-chunk-1


p + annotate("text", x = 3, y = 48, label = "Group 1", family = "serif", fontface = "italic",     colour = "darkred", size = 5) + annotate("text", x = 4.5, y = 66, label = "Group 2",     family = "serif", fontface = "italic", colour = "darkred", size = 5)

plot of chunk unnamed-chunk-2


p + annotate("text", x=3, y=48, label="Group 1", alpha=.1) +     # Normal    geom_text(x=4.5, y=66, label="Group 2", alpha=.1)            # Overplotted

plot of chunk unnamed-chunk-3

我们可以用Inf -Inf,来控制文本放在图像的边缘
x=-Inf是左侧,x=Inf是右侧,y=-Inf是上方,y=Inf是下方 如果直接这样还可能无法看到文字,还要通过vjust和hjust的调整。

q = p + annotate("text", x = -Inf, y = Inf, label = "Upper left", hjust = -0.2,     vjust = 2) + annotate("text", x = mean(range(faithful$eruptions)), y = -Inf,     vjust = -0.4, label = "Bottom middle")q

plot of chunk unnamed-chunk-4

q + annotate("text", x = Inf, y = Inf, label = "Upper right", hjust = 2, vjust = 2) +     annotate("text", x = -Inf, y = -Inf, label = "Down left", hjust = -0.1,         vjust = -1) + annotate("text", x = Inf, y = -Inf, label = "Down right",     hjust = 2, vjust = -1)

plot of chunk unnamed-chunk-4




ggplot 中的数学表达形式和R基础作图的形式是一样的。

# 正态曲线p <- ggplot(data.frame(x = c(-3, 3)), aes(x = x)) + stat_function(fun = dnorm)# 用annotate 中的'text',然后要使parse=TRUEp + annotate("text", x = 2, y = 0.3, parse = TRUE, label = "frac(1, sqrt(2 * pi)) * e ^ {-x^2 / 2}")

plot of chunk unnamed-chunk-6

p + annotate("text", x = 0, y = 0.2, label = "x^2")

plot of chunk unnamed-chunk-6

p + annotate("text", x = 0, y = 0.2, parse = TRUE, label = "x^2")

plot of chunk unnamed-chunk-6

# 公式的两部分p + annotate("text", x = 0, y = 0.05, parse = TRUE, size = 4, label = "Function: \t   y==frac(1, sqrt(2*pi)) * e^{-x^2/2}")

plot of chunk unnamed-chunk-6

# 用*在两部分之间进行连接 普通的文字用单引号引起来p + annotate("text", x = 0, y = 0.05, parse = TRUE, size = 4, label = "'Function:  ' * y==frac(1, sqrt(2*pi)) * e^{-x^2/2}")

plot of chunk unnamed-chunk-6



plot(x, x)abline(v = 5)abline(h = 5)# 添加斜线abline(0, 1)  # 添加斜率为1,截距为0的斜线abline(1, 1)  # 添加斜率为1,截距为0的斜线

plot of chunk unnamed-chunk-7


library(gcookbook)  # For the data setp <- ggplot(heightweight, aes(x = ageYear, y = heightIn, colour = sex)) + geom_point()# 用geom_hline添加水平直线 用geom_vline添加竖直直线p + geom_hline(yintercept = 60) + geom_vline(xintercept = 14)

plot of chunk unnamed-chunk-8

# 添加任意斜率的 intercept控制截距,slope控制斜率p + geom_abline(intercept = 37.4, slope = 1.75)

plot of chunk unnamed-chunk-8

# 除了我们直接赋值给xintercept 和yintercept以外,# 我们还可以从数据中给他们赋值library(plyr)  # For the ddply() function# 统计了不同性别的高度的平均值hw_means <- ddply(heightweight, "sex", summarise, heightIn = mean(heightIn))hw_means
##   sex heightIn## 1   f    60.53## 2   m    62.06
p + geom_hline(aes(yintercept = heightIn, colour = sex), data = hw_means, linetype = "dashed",     size = 1)

plot of chunk unnamed-chunk-8

# 线的类型 0 = blank, 1 = solid, 2 = dashed, 3 = dotted, 4 = dotdash, 5 =# longdash, 6 = twodashp + geom_vline(xintercept = 12, linetype = "blank") + geom_vline(xintercept = 13,     linetype = "solid") + geom_vline(xintercept = 14, linetype = "dotted") +     geom_vline(xintercept = 15, linetype = "dotdash") + geom_vline(xintercept = 16,     linetype = "longdash") + geom_vline(xintercept = 17, linetype = "twodash")

plot of chunk unnamed-chunk-8

# 处理坐标是离散形的pg <- ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_point()pg

plot of chunk unnamed-chunk-8

# 我们要给trt1这竖列添加竖线 首先我们要看一下几个离散量的levellevels(PlantGrowth$group)
## [1] "ctrl" "trt1" "trt2"
# 结果中'ctrl','trt1','trt2'从做往右我们分别可以用1,2,3,...表示pg + geom_vline(xintercept = 2)

plot of chunk unnamed-chunk-8

# 还可以用更聪明的办法pg + geom_vline(xintercept = which(levels(PlantGrowth$group) == "ctrl"))

plot of chunk unnamed-chunk-8


library(gcookbook)  # For the data setp <- ggplot(subset(climate, Source == "Berkeley"), aes(x = Year, y = Anomaly10y)) +     geom_line()# 添加线段的方法,用segment,然后指定x,y的起始,终止位置p + annotate("segment", x = 1950, xend = 1980, y = -0.25, yend = -0.25)

plot of chunk unnamed-chunk-9

# grid包中的arrow可以为segment线段添加箭头,箭头的方向就是xy的方向# 同时用arrow可以为线段的两端添加'末端线段library(grid)p + annotate("segment", x = 1850, xend = 1820, y = -0.8, yend = -0.95, colour = "blue",     size = 2, arrow = arrow()) + annotate("segment", x = 1950, xend = 1980,     y = -0.25, yend = -0.25, arrow = arrow(ends = "both", angle = 90, length = unit(0.2,         "cm")))

plot of chunk unnamed-chunk-9


library(gcookbook)  # For the data setp <- ggplot(subset(climate, Source == "Berkeley"), aes(x = Year, y = Anomaly10y)) +     geom_line()# 添加阴影的代码,同样,四点确定一个矩形,alpha确定透明度,fill确定填充的颜色p + annotate("rect", xmin = 1950, xmax = 1980, ymin = -1, ymax = 1, alpha = 0.1,     fill = "blue")

plot of chunk unnamed-chunk-10


# 要想高亮某一个元素,或者几个元素,# 先新建一个变量,然后对要高亮的部分赋予不同的值pg <- PlantGrowth  # Make a copy of the PlantGrowth datapg$hl <- "no"  # Set all to 'no'pg$hl[pg$group == "trt2"] <- "yes"  # If group is 'trt2', set to 'yes'ggplot(pg, aes(x = group, y = weight, fill = hl)) + geom_boxplot() + scale_fill_manual(values = c("grey85",     "#FFDDCC"), guide = FALSE)

plot of chunk unnamed-chunk-11

ggplot(PlantGrowth, aes(x = group, y = weight, fill = group)) + geom_boxplot() +     scale_fill_manual(values = c("grey85", "grey85", "#FFDDCC"), guide = FALSE)

plot of chunk unnamed-chunk-11



# 我们有一个简单数据x = c(1, 2, 3)y = c(5, 9, 10)error1 = c(0.1, 0.15, 0.14)error2 = c(0.1, 0.15, 0.14)# 在普通的作图系统中我们用的是arrors这个函数来添加。plot(x, y, type = "b", col = "blue", pch = 15, lty = 2)# 我们首先计算有误差时y的值y_upper = y + error1y_lower = y - error1# 添加误差线arrows(x, y_upper, x, y_lower, length = 0.05, angle = 90, code = 3, pch = 19)

plot of chunk unnamed-chunk-12

library(gcookbook)  # For the data set# Take a subset of the cabbage_exp data for this examplece <- subset(cabbage_exp, Cultivar == "c39")ce
##   Cultivar Date Weight     sd  n      se## 1      c39  d16   3.18 0.9566 10 0.30251## 2      c39  d20   2.80 0.2789 10 0.08819## 3      c39  d21   2.74 0.9834 10 0.31098
# 上面数据中,se是一个误差 条形图的误差线# geom_errorbar,需要的参数就是y值在有误差是的最大和最小值ggplot(ce, aes(x = Date, y = Weight)) + geom_bar(fill = "white", colour = "black") +     geom_errorbar(aes(ymin = Weight - se, ymax = Weight + se), width = 0.2)
## Mapping a variable to y and also using stat="bin".##   With stat="bin", it will attempt to set the y value to the count of cases in each group.##   This can result in unexpected behavior and will not be allowed in a future version of ggplot2.##   If you want y to represent counts of cases, use stat="bin" and don't map a variable to y.##   If you want y to represent values in the data, use stat="identity".##   See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)

plot of chunk unnamed-chunk-13

# 折线图的误差线ggplot(ce, aes(x = Date, y = Weight)) + geom_line(aes(group = 1)) + geom_point(size = 4) +     geom_errorbar(aes(ymin = Weight - se, ymax = Weight + se), width = 0.2)

plot of chunk unnamed-chunk-13

##   Cultivar Date Weight     sd  n      se## 1      c39  d16   3.18 0.9566 10 0.30251## 2      c39  d20   2.80 0.2789 10 0.08819## 3      c39  d21   2.74 0.9834 10 0.31098## 4      c52  d16   2.26 0.4452 10 0.14079## 5      c52  d20   3.11 0.7909 10 0.25009## 6      c52  d21   1.47 0.2111 10 0.06675
# position=position_dodge()是调整误差线的位置# width是跳帧误差线上下横线的长度 差:没有调整过的误差线ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge") +     geom_errorbar(aes(ymin = Weight - se, ymax = Weight + se), position = "dodge",         width = 0.2)
## Mapping a variable to y and also using stat="bin".##   With stat="bin", it will attempt to set the y value to the count of cases in each group.##   This can result in unexpected behavior and will not be allowed in a future version of ggplot2.##   If you want y to represent counts of cases, use stat="bin" and don't map a variable to y.##   If you want y to represent values in the data, use stat="identity".##   See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)

plot of chunk unnamed-chunk-13

# 好: 调整后的ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge") +     geom_errorbar(aes(ymin = Weight - se, ymax = Weight + se), position = position_dodge(0.9),         width = 0.2)
## Mapping a variable to y and also using stat="bin".##   With stat="bin", it will attempt to set the y value to the count of cases in each group.##   This can result in unexpected behavior and will not be allowed in a future version of ggplot2.##   If you want y to represent counts of cases, use stat="bin" and don't map a variable to y.##   If you want y to represent values in the data, use stat="identity".##   See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)

plot of chunk unnamed-chunk-13

# 位置的调整有时候必不可少,特别是在同一幅图中有多条线的时候ggplot(cabbage_exp, aes(x = Date, y = Weight, colour = Cultivar, group = Cultivar)) +     geom_errorbar(aes(ymin = Weight - se, ymax = Weight + se), width = 0.2,         size = 0.25, colour = "black") + geom_line() + geom_point(size = 2.5)

plot of chunk unnamed-chunk-13

# 上面的图,误差线交叉在一起 我们通过position_dodge()来进行调整pd <- position_dodge(0.3)  # Save the dodge spec because we use it repeatedlyggplot(cabbage_exp, aes(x = Date, y = Weight, colour = Cultivar, group = Cultivar)) +     geom_errorbar(aes(ymin = Weight - se, ymax = Weight + se), width = 0.2,         size = 0.25, colour = "black", position = pd) + geom_line(position = pd) +     geom_point(position = pd, size = 2.5)
## ymax not defined: adjusting position using y instead## ymax not defined: adjusting position using y instead

plot of chunk unnamed-chunk-13

# Thinner error bar lines with size=0.25, and larger points with size=2.5


# 基本图形p <- ggplot(mpg, aes(x = displ, y = hwy)) + geom_point() + facet_grid(. ~ drv)p

plot of chunk unnamed-chunk-14

# 新建一个包含标签信息的数据框f_labels <- data.frame(drv = c("4", "f", "r"), label = c("4wd", "Front", "Rear"))f_labels
##   drv label## 1   4   4wd## 2   f Front## 3   r  Rear
# 用geom_text添加p + geom_text(x = 6, y = 40, aes(label = label), data = f_labels)

plot of chunk unnamed-chunk-14

# 如果用annotate来添加,那么标签的内容会出现在每一个面板上p + annotate("text", x = 6, y = 42, label = "label text")

plot of chunk unnamed-chunk-14

# 一个返回回归方程和r^2值的程序lm_labels <- function(dat) {    mod <- lm(hwy ~ displ, data = dat)    formula <- sprintf("italic(y) == %.2f %+.2f * italic(x)", round(coef(mod)[1],         2), round(coef(mod)[2], 2))    r <- cor(dat$displ, dat$hwy)    r2 <- sprintf("italic(R^2) == %.2f", r^2)    data.frame(formula = formula, r2 = r2, stringsAsFactors = FALSE)}library(plyr)  # For the ddply() functionlabels <- ddply(mpg, "drv", lm_labels)labels
##   drv                              formula                  r2## 1   4 italic(y) == 30.68 -2.88 * italic(x) italic(R^2) == 0.65## 2   f italic(y) == 37.38 -3.60 * italic(x) italic(R^2) == 0.36## 3   r italic(y) == 25.78 -0.92 * italic(x) italic(R^2) == 0.04
# Plot with formula and R^2 valuesp + geom_smooth(method = lm, se = FALSE) + geom_text(x = 3, y = 40, aes(label = formula),     data = labels, parse = TRUE, hjust = 0) + geom_text(x = 3, y = 35, aes(label = r2),     data = labels, parse = TRUE, hjust = 0)

plot of chunk unnamed-chunk-14

# Find r^2 values for each grouplabels <- ddply(mpg, "drv", summarise, r2 = cor(displ, hwy)^2)labels$r2 <- sprintf("italic(R^2) == %.2f", labels$r2)
