共享单车数据可视化

共享单车数据可视化

本文是:Heat maps with Divvy data 2的学习笔记,主要介绍了如何使用Divvy共享单车数据绘制日历图。数据量很大,大约1500万个观测值,所以处理起来很耗时间。

虽然bikedata包可以直接用来获取所需数据,但是我没完全搞懂怎么处理,所以最后不得不一个个处理,不过下载和解压还是可以直接使用bikedata包里面的函数。

如果你不想运行这些非常耗时的语句,可以直接下载处理后的数据集:
bikedata.rds
使用这个数据集的方法是:

R
1
2
library(io)
divvy.rides <- qread('bikedata.rds')

如果你想学习这个数据是如何被整理的,可以一个个敲下面的代码:

R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
library(bikedata)
# 首先下载数据
dl_bikedata (city = 'chicago', data_dir = 'bikedb')
# 解压文件(似乎本来的功能是创建数据库的)
store_bikedata(city = 'chicago', bikedb = 'bikedb', data_dir = 'bikedb')
setwd('bikedb')
df1 <- read.csv("Divvy_Trips_2013.csv")
df2 <- read.csv("Divvy_Trips_2014_Q1Q2.csv")
df3 <- read.csv("Divvy_Trips_2014-Q3-07.csv")
df4 <- read.csv("Divvy_Trips_2014-Q3-0809.csv")
df5 <- read.csv("Divvy_Trips_2014-Q4.csv")
df6 <- read.csv("Divvy_Trips_2015_07.csv")
df7 <- read.csv("Divvy_Trips_2015_08.csv")
df8 <- read.csv("Divvy_Trips_2015_09.csv")
df9 <- read.csv("Divvy_Trips_2015_Q4.csv")
df10 <- read.csv("Divvy_Trips_2015-Q1.csv")
df11 <- read.csv("Divvy_Trips_2015-Q2.csv")
df12 <- read.csv("Divvy_Trips_2016_04.csv")
df13 <- read.csv("Divvy_Trips_2016_05.csv")
df14 <- read.csv("Divvy_Trips_2016_06.csv")
df15 <- read.csv("Divvy_Trips_2016_Q1.csv")
df16 <- read.csv("Divvy_Trips_2016_Q3.csv")
df17 <- read.csv("Divvy_Trips_2016_Q4.csv")
df18 <- read.csv("Divvy_Trips_2017_Q1.csv")
df19 <- read.csv("Divvy_Trips_2017_Q2.csv")
df20 <- read.csv("Divvy_Trips_2017_Q3.csv")
df21 <- read.csv("Divvy_Trips_2017_Q4.csv")
df22 <- read.csv("Divvy_Trips_2018_Q1.csv")
df23 <- read.csv("Divvy_Trips_2018_Q2.csv")

colnames(df18) <- c("trip_id", "starttime")
colnames(df19) <- c("trip_id", "starttime")
colnames(df20) <- c("trip_id", "starttime")
colnames(df21) <- c("trip_id", "starttime")
colnames(df22) <- c("trip_id", "starttime")
colnames(df23) <- c("trip_id", "starttime")

df <- rbind(
df2[, 1:2],
df3[, 1:2],
df4[, 1:2],
df5[, 1:2],
df6[, 1:2],
df7[, 1:2],
df8[, 1:2],
df9[, 1:2],
df10[, 1:2],
df11[, 1:2],
df12[, 1:2],
df13[, 1:2],
df14[, 1:2],
df15[, 1:2],
df16[, 1:2],
df17[, 1:2],
df18[, 1:2],
df19[, 1:2],
df20[, 1:2],
df21[, 1:2])

df$date <- as.Date(df$starttime, format = '%m/%d/%Y %H:%M')

dftemp <- rbind(df1[, 1:2], df22[, 1:2], df23[, 1:2])
dftemp$date <- as.Date(dftemp$starttime, format = '%Y-%m-%d %H:%M')
df <- rbind(df, dftemp)
df$date <- as.character(df$date) %>% as.Date('%Y-%m-%d')

library(sqldf)
divvy.rides <- sqldf('select date, count(trip_id) as numtrips from df group by date')
divvy.rides <- subset(divvy.rides, !is.na(divvy.rides$date))
# 前面的代码运行了好久,所以为了不再浪费时间重复,赶紧把结果保存起来:
library(io)
qwrite(divvy.rides, "bikedata.rds")
# 删除没有用的对象
a <- ls()
rm(list = a[which(a != 'divvy.rides')])
rm(a)

绘制日历图:

R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
ggplot(divvy.rides,
aes(x = week,
y = weekdays,
fill = numtrips)) +
viridis::scale_fill_viridis(
name = "骑行次数",
option = 'C',
direction = 1,
na.value = 'grey93'
) +
geom_tile(color = 'white',
size = 0.1) +
facet_grid(year~.) +
scale_x_continuous(
expand = c(0, 0),
breaks = seq(1, 52, length = 12),
labels = c("一月", "二月", "三月" ,"四月",
"五月", "六月", "七月", "八月",
"九月", "十月", "十一月",
"十二月")) +
theme_ipsum(
plot_title_family = 'STSongti-SC-Bold',
base_family = 'STSongti-SC-Bold') +
theme(axis.title = element_blank()) +
labs(title = "Divvy骑行数据日历图",
subtitle = "2013年~2018年",
caption = "数据来源:Divvy数据") +
theme(axis.text.y = element_text(size = 8))

还可以看一下Divvy周骑行次数的变化:

R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
weekly.rides <- divvy.rides %>%
group_by(year, week) %>%
summarise(n = sum(numtrips))

ggplot(weekly.rides,
aes(x = week, y = n,
colour = year)) +
geom_line(alpha = 0.25) +
geom_smooth(se = F, method = 'loess',
alpha = 0.35) +
theme_ipsum(
plot_title_family = 'STSongti-SC-Bold',
base_family = 'STSongti-SC-Bold'
) +
labs(
title = "Divvy骑行次数的增长",
subtitle = "2013-2017年按周计数",
caption = "数据源:Divvy数据\nhttps://www.divvybikes.com/system-data"
) +
scale_y_continuous(labels = scales::comma) +
theme(axis.title = element_blank())

每年的骑行量:

R
1
2
3
4
knitr::kable(
weekly.rides %>%
group_by(year) %>%
summarise(count = sum(n)))
year count
2013 759788
2014 2454634
2015 3183439
2016 3595383
2017 3829014
2018 1446826
# R

评论

Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×