冲积图与桑基图

冲积图与桑基图

本文是学习:

的笔记,讲述了如何使用ggplot2及其拓展绘制桑基图。

从 alluvial 开始

alluvial 是基于R基础绘图系统的一个包,所以不好用:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
library(alluvial)
tit <- Titanic %>% as_tibble()
tit %>% head(5) %>% knitr::kable(align = "c")

alluvial(
select(tit, Survived, Sex, Age, Class),
freq = tit$n,
col = ifelse(tit$Survived == "Yes", "orange", "grey"),
border = ifelse(tit$Survived == "Yes", "orange", "grey"),
layer = tit$Survived != "Yes",
alpha = 0.8,
blocks = FALSE,
title = "\n联合国难民署承认的难民数量\n"
)

实际上你还遇到中文无法使用的问题,我是将源码下载下来修改源码实现的。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 冲积图还可以用来可视化面板数据
Refugees %>%
head(5) %>%
knitr::kable(align = "c")

set.seed(39)
(cols <- hsv(h = sample(1:10/10),
s = sample(3:12)/15,
v = sample(3:12)/15))

alluvial_ts(Refugees,
wave = 0.3,
ygap = 5,
col = cols,
plotdir = "centred",
alpha = 0.9,
ylab = "",
border = NA,
axis.cex = 0.8,
leg.cex = 0.8,
leg.col = "white",
title = "\n\n联合国难民署承认的难民数量\n")

ggalluvial 包

这个包我在之前的文章中用过,很不错。你可以传入两种格式的数据,宽数据或者长数据。

宽数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# 宽数据
library(ggalluvial)
tit_wide <- as_tibble(Titanic)
head(tit_wide)
# # A tibble: 6 x 5
# Class Sex Age Survived n
# <chr> <chr> <chr> <chr> <dbl>
# 1 1st Male Child No 0
# 2 2nd Male Child No 0
# 3 3rd Male Child No 35
# 4 Crew Male Child No 0
# 5 1st Female Child No 0
# 6 2nd Female Child No 0
ggplot(
tit_wide,
aes(axis1 = Class, axis2 = Sex, axis3 = Age, y = n)
) +
scale_x_discrete(limits = c("Class", "Sex", "Age"),
expand = c(0.1, 0.05)) +
geom_alluvium(aes(fill = Survived)) +
geom_stratum() +
geom_text(stat = "stratum",
label.strata = TRUE,
family = "RobotoSlab-Regular") +
labs(title = "泰坦尼克号上的乘客",
subtitle = "按人口统计学特征和生存与否划分") +
theme(axis.title.y = element_blank(),
axis.title.x = element_blank())

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# 判断数据是否为 Alluvia (wide) 格式
head(as_tibble(UCBAdmissions), n = 12)
# # A tibble: 12 x 4
# Admit Gender Dept n
# <chr> <chr> <chr> <dbl>
# 1 Admitted Male A 512
# 2 Rejected Male A 313
# 3 Admitted Female A 89
# 4 Rejected Female A 19
# 5 Admitted Male B 353
# 6 Rejected Male B 207
# 7 Admitted Female B 17
# 8 Rejected Female B 8
# 9 Admitted Male C 120
# 10 Rejected Male C 205
# 11 Admitted Female C 202
# 12 Rejected Female C 391
is_alluvia_form(as_tibble(UCBAdmissions), axes = 1:3)
# [1] TRUE
as_tibble(UCBAdmissions) %>%
ggplot(
aes(axis1 = Gender, axis2 = Dept, y = n)
) +
geom_alluvium(aes(fill = Admit), width = 1/12) +
geom_stratum(width = 1/12, fill = "black", color = "grey") +
geom_label(stat = "stratum", label.strata = TRUE) +
scale_x_discrete(limits = c("Gender", "Dept"), expand = c(0.05, 0.05)) +
scale_fill_brewer(palette = "Set1") +
labs(title = "加州大学伯克利分校录取与拒绝情况", subtitle = "按性别和部门分类") +
theme(axis.title.y = element_blank())

长数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# 长数据
(tit_lone <- to_lodes_form(
as_tibble(Titanic),
key = "Demographic",
axes = 1:3))
# # A tibble: 96 x 5
# Survived n alluvium Demographic stratum
# <chr> <dbl> <int> <fct> <fct>
# 1 No 0 1 Class 1st
# 2 No 0 2 Class 2nd
# 3 No 35 3 Class 3rd
# 4 No 0 4 Class Crew
# 5 No 0 5 Class 1st
# 6 No 0 6 Class 2nd
# 7 No 17 7 Class 3rd
# 8 No 0 8 Class Crew
# 9 No 118 9 Class 1st
# 10 No 154 10 Class 2nd
# # … with 86 more rows
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
(p <- ggplot(
data = tit_lone,
aes(x = Demographic,
stratum = stratum,
alluvium = alluvium,
y = n,
label = stratum)
) +
geom_alluvium(aes(fill = Survived)) +
geom_stratum() +
geom_text(stat = "stratum", family = "RobotoSlab-Regular") +
labs(title = "泰坦尼克号上的乘客",
subtitle = "按人口统计学特征和生存与否划分") +
theme(axis.title.y = element_blank(),
axis.title.x = element_blank()))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# 判断是否为 Lodes (lone) 格式
(ucb <- to_lodes_form(
as_tibble(UCBAdmissions),
axes = 1:3,
id = "Cohort"))
is_lodes_form(
ucb, key = x,
value = stratum,
id = Cohort,
silent = TRUE
)
# [1] TRUE

data(Refugees, package = "alluvial")
country_regions <- c(
Afghanistan = "中东",
Burundi = "中非",
`Congo DRC` = "中非",
Iraq = "中东",
Myanmar = "东南亚",
Palestine = "中东",
Somalia = "非洲之角",
Sudan = "中非",
Syria = "中东",
Vietnam = "东南亚"
)
Refugees$region <- country_regions[Refugees$country]

ggplot(Refugees,
aes(x = year, y = refugees,
alluvium = country)) +
geom_alluvium(aes(fill = country,
color = country)) +
scale_x_continuous(breaks = seq(2003, 2013, 2)) +
theme(axis.text.x = element_text(angle = -30, hjust = 0)) +
scale_fill_brewer(name = "国家", palette = "Set3") +
scale_color_brewer(name = "国家", palette = "Set3") +
facet_wrap(~ region, scales = "fixed") +
labs(x = "年份", y = "难民数量(万人)",
title = "各国难民数量") +
scale_y_continuous(breaks = c(0, 2000000, 4000000), labels = c(0, 200, 400))

解决标签相互遮盖的问题

解决方案一: ggrepel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
ggplot(vaccinations,
aes(x = survey, stratum = response,
alluvium = subject, y = freq,
fill = response)) +
scale_x_discrete(expand = c(.4, 0)) +
geom_flow(width = 1/4) +
geom_stratum(alpha = .5, width = 1/4) +
scale_linetype_manual(values = c("blank", "solid")) +
ggrepel::geom_text_repel(
aes(label = ifelse(as.numeric(survey) == 1,
as.character(response), NA)),
stat = "stratum", size = 4, direction = "y", nudge_x = -.5
) +
ggrepel::geom_text_repel(
aes(label = ifelse(as.numeric(survey) == 3,
as.character(response), NA)),
stat = "stratum", size = 4, direction = "y", nudge_x = .5
) +
theme(legend.position = "none") +
ggtitle("vaccination survey responses", "labeled using geom_text_repel()")

解决方案2:ggfittext

1
2
3
4
5
6
7
8
9
10
ggplot(vaccinations,
aes(x = survey, stratum = response,
alluvium = subject, y = freq,
fill = response, label = response)) +
scale_x_discrete(expand = c(.1, 0)) +
geom_flow(width = 1/4) +
geom_stratum(alpha = .5, width = 1/4) +
ggfittext::geom_fit_text(stat = "stratum", width = 1/4, min.size = 4) +
theme(legend.position = "none") +
ggtitle("vaccination survey responses", "labeled using geom_fit_text()")

# R

评论

程振兴

程振兴 @czxa.top
截止今天,我已经在本博客上写了636.8k个字了!

Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×