篮球比赛输赢概率热力图

篮球比赛输赢概率热力图

本文是学习zonination/basketball-heatmap的笔记,绘制了一幅两只队伍比赛胜率的热力图。

这个图需要的数据集:basketball-heatmap.zip
绘图代码:

R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
games <- read.csv("games2016.csv", stringsAsFactors = F)
games <- rbind(games, read.csv("games2015.csv", stringsAsFactors = F))
games <- rbind(games, read.csv("games2014.csv", stringsAsFactors = F))
games <- rbind(games, read.csv("games2013.csv", stringsAsFactors = F))
games <- rbind(games, read.csv("games2012.csv", stringsAsFactors = F))

library(ggplot2)
library(reshape2)
library(scales)
library(stringr)
View(games)

# 选取倒数第一个单词
games$v.name <- word(games$v.name, -1)
games$h.name <- word(games$h.name, -1)

(n <- length(unique(c(games$v.name, games$h.name))))
score <- as.data.frame(matrix(0, ncol = n, nrow = n))
total <- as.data.frame(matrix(0, ncol = n, nrow = n))
names(score) <- unique(c(games$v.name, games$h.name))
names(total) <- unique(c(games$v.name, games$h.name))
row.names(score) <- names(score)
row.names(total) <- names(total)

for(n in 1:nrow(games)){
# 如果v赢了,就把score矩阵中对应的地方加1,反之亦然
if(games$v.score[n]>games$h.score[n]){
eval(parse(text = paste(
"score[\"", games$v.name[n], "\", \"",
games$h.name[n], "\"] <- ",
"score[\"", games$v.name[n], "\", \"",
games$h.name[n], "\"] + 1",
sep = ""
)))
} else{
eval(parse(text = paste(
"score[\"", games$h.name[n], "\", \"",
games$v.name[n], "\"] <- ",
"score[\"", games$h.name[n], "\", \"",
games$v.name[n], "\"] + 1",
sep = ""
)))
}
# 再计算比赛场次总和
eval(parse(text = paste(
"total[\"", games$v.name[n], "\", \"", games$h.name[n], "\"] <- ",
"total[\"", games$v.name[n], "\", \"", games$h.name[n], "\"] + 1",
sep = ""
)))
eval(parse(text = paste(
"total[\"", games$h.name[n], "\", \"", games$v.name[n], "\"] <- ",
"total[\"", games$h.name[n], "\", \"", games$v.name[n], "\"] + 1",
sep = ""
)))
}


# 计算输赢概率
total[total == 0] <- NA
df <- score / (total)
df$challenger <- row.names(score)
df <- melt(df, id = "challenger")
total$challenger <- row.names(total)
df$ngames <- melt(total, id = "challenger")$value
df$challenger <- factor(df$challenger)
df$variable <- factor(df$variable, levels =
levels(df$variable)[order(levels(df$variable))])
df$challenger <- factor(df$challenger, levels =
levels(df$challenger)[order(levels(df$challenger), decreasing = T)])
df <- subset(df, !is.na(df$value))

ggplot(df, aes(y = challenger, x = variable), na.rm = T) +
geom_tile(aes(fill = value), color = "white") +
geom_text(aes(label = percent(round(value, 2))), size = 3.5) +
scale_fill_distiller("赢的概率", palette = "BuGn", direction = 1, labels = scales::percent_format()) +
scale_alpha(guide = "none") +
theme_bw(base_family = "STSong", base_size = 15) +
labs(title = "图:2012年以来篮球比赛赢的概率热力图",
x = "防守方", y = "进攻方") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
theme(plot.title = element_text(hjust = 0.5))

# R

评论

Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×