关于Stata的一些感悟~

昨天,有个用户留言说“stata搞计量统计的,这有啥用?”。

四十年政府工作报告词频变化——Stata图表绘制

  • 首先我搜集了1978年至今的所有政府工作报告,为了便于读入,我还是把逗号句号都替换成了换行,为了便于大家模仿,我把这个项目打包上传到了我的仓库,点击下面的链接即可下载:

7818政府工作报告.zip

  • 成果图:


词频统计汇总:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
cd "~/Desktop/7818政府工作报告"
forval year = 1978/2018{
infix str v 1-200 using `year'.txt, clear
drop if v == ""
forvalues i = 1/`=_N'{
gen v`i' = ""
forvalues j = 1/`=ustrwordcount(`"`=v[`i']'"', "cn")' {
replace v`i' = v`i' + ustrword(`"`=v[`i']'"', `j', "cn") + "$" in 1
}
}
keep in 1
foreach i of varlist _all{
ren `i' temp`i'
}
drop tempv
sxpose, clear
split _var1, parse($)
drop _var1
local m = 1
foreach i of varlist _all{
cap restore
preserve
keep `i'
ren `i' v
save `m', replace
local m = `m' + 1
restore
}
use 1, clear
forvalues i = 2/40{
cap append using `i'
cap erase `i'.dta
}
erase 1.dta
keep if ustrregexm(v, "[\u4e00-\u9fa5]+")
bysort v: egen num = count(v)
duplicates drop v, force
gen year = "`year'"
save `year', replace
}
use 1978, clear
forvalues i = 1979/2018{
append using `i'
erase `i'.dta
}
save 7818分词结果, replace

近四十年政府工作报告长青词汇

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
use 7818分词结果, clear
keep if inlist(v, "发展", "建设", "改革", "经济", "工作") | inlist(v, "企业", "社会", "国家", "人民", "政府")
destring year, replace
gsort v year
gen num1 = -num
replace num = num + 2000 if v == "发展"
replace num1 = num1 + 2000 if v == "发展"
replace num = num + 1500 if v == "经济"
replace num1 = num1 + 1500 if v == "经济"
replace num = num + 1000 if v == "工作"
replace num1 = num1 + 1000 if v == "工作"
replace num = num + 500 if v == "社会"
replace num1 = num1 + 500 if v == "社会"

replace year = year + 48 if v == "建设"
replace year = year + 48 if v == "改革"
replace year = year + 48 if v == "企业"
replace year = year + 48 if v == "国家"
replace year = year + 48 if v == "政府"

replace num = num + 2000 if v == "建设"
replace num1 = num1 + 2000 if v == "建设"
replace num = num + 1500 if v == "改革"
replace num1 = num1 + 1500 if v == "改革"
replace num = num + 1000 if v == "企业"
replace num1 = num1 + 1000 if v == "企业"
replace num = num + 500 if v == "国家"
replace num1 = num1 + 500 if v == "国家"

tw rarea num num1 year if v == "发展", fc(red*0.6) lc(red*0.6) text(2000 1999 "发展", color(red*0.7) size(*2)) || ///
rarea num num1 year if v == "经济", fc(green*0.6) lc(green*0.6) text(1500 1999 "经济", color(green*0.7) size(*2)) || ///
rarea num num1 year if v == "工作", fc(orange*0.6) lc(orange*0.6) text(1000 1999 "工作", color(orange*0.7) size(*2)) || ///
rarea num num1 year if v == "社会", fc(pink*0.6) lc(pink*0.6) text(500 1999 "社会", color(pink*0.7) size(*2)) || ///
rarea num num1 year if v == "人民", fc(brown*0.6) lc(brown*0.6) text(0 1999 "人民", color(brown*0.7) size(*2)) || ///
rarea num num1 year if v == "建设", fc(cranberry*0.8) lc(cranberry*0.8) text(2000 2047 "建设", color(cranberry*0.9) size(*2)) || ///
rarea num num1 year if v == "改革", fc(blue*0.6) lc(blue*0.6) text(1500 2047 "改革", color(blue*0.7) size(*2)) || ///
rarea num num1 year if v == "企业", fc(dkorange*0.6) lc(dkorange*0.6) text(1000 2047 "企业", color(dkorange*0.7) size(*2)) || ///
rarea num num1 year if v == "国家", fc(khaki*0.6) lc(khaki*0.6) text(500 2047 "国家", color(khaki*0.7) size(*2)) || ///
rarea num num1 year if v == "政府", fc(erose*0.6) lc(erose*0.6) text(0 2047 "政府", color(erose*0.7) size(*2)) ||, ///
xline(1978 1983 1988 1993 1998 2003 2008 2013 2018 2026 2031 2036 2041 2046 2051 2056 2061 2066, lc(grey*0.1)) yla(, nogrid) ysc(off) xsc(off) leg(off) ///
plotr(fc(white) lc(white)) xla(, nogrid) sch(s1mono) ///
text(2400 1978 "1978", color(grey*0.2)) ///
text(2400 1988 "1988", color(grey*0.2)) ///
text(2400 1998 "1998", color(grey*0.2)) ///
text(2400 2008 "2008", color(grey*0.2)) ///
text(2400 2018 "2018", color(grey*0.2)) ///
text(2400 2026 "1978", color(grey*0.2)) ///
text(2400 2036 "1988", color(grey*0.2)) ///
text(2400 2046 "1998", color(grey*0.2)) ///
text(2400 2056 "2008", color(grey*0.2)) ///
text(2400 2066 "2018", color(grey*0.2)) ///
text(-300 1978 "1978", color(grey*0.2)) ///
text(-300 1988 "1988", color(grey*0.2)) ///
text(-300 1998 "1998", color(grey*0.2)) ///
text(-300 2008 "2008", color(grey*0.2)) ///
text(-300 2018 "2018", color(grey*0.2)) ///
text(-300 2026 "1978", color(grey*0.2)) ///
text(-300 2036 "1988", color(grey*0.2)) ///
text(-300 2046 "1998", color(grey*0.2)) ///
text(-300 2056 "2008", color(grey*0.2)) ///
text(-300 2066 "2018", color(grey*0.2)) ///
graphr(margin(8 8 8 18)) text(2660 2022 "近四十年政府工作报告常青词汇", size(large))
gre 近四十年政府工作报告常青词汇

近四十年政府工作报告关键词对比

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
use 7818分词结果, clear
keep if inlist(v, "城镇", "农村", "工业", "农业", "制造", "服务", "计划", "市场")
destring year, replace
gsort v year
gen num1 = -num
replace num = num + 600 if inlist(v, "农村", "城镇")
replace num1 = num1 + 600 if inlist(v, "农村", "城镇")

replace num = num + 400 if inlist(v, "农业", "工业")
replace num1 = num1 + 400 if inlist(v, "农业", "工业")

replace num = num + 200 if inlist(v, "制造", "服务")
replace num1 = num1 + 200 if inlist(v, "制造", "服务")

tw rarea num num1 year if v == "农村", fc(green*0.6) lc(green*0.6) text(680 2010 "农村", color(green*0.7) size(*1.5)) fintensity(inten20) || ///
rarea num num1 year if v == "城镇", fc(red*0.6) lc(red*0.6) text(550 2000 "城镇", color(red*0.7) size(*1.5)) fintensity(inten20) || ///
rarea num num1 year if v == "农业", fc(orange*0.6) lc(orange*0.6) text(480 2010 "农业", color(orange*0.7) size(*1.5)) fintensity(inten20) || ///
rarea num num1 year if v == "工业", fc(pink*0.6) lc(pink*0.6) text(350 2000 "工业", color(pink*0.7) size(*1.5)) fintensity(inten20) || ///
rarea num num1 year if v == "服务", fc(brown*0.6) lc(brown*0.6) text(280 2010 "服务", color(brown*0.7) size(*1.5)) fintensity(inten20) || ///
rarea num num1 year if v == "制造", fc(cranberry*0.8) lc(cranberry*0.8) text(150 2000 "制造", color(cranberry*0.9) size(*1.5)) || ///
rarea num num1 year if v == "市场", fc(blue*0.6) lc(blue*0.6) text(80 2010 "市场", color(blue*0.7) size(*1.5)) fintensity(inten20) || ///
rarea num num1 year if v == "计划", fc(dkorange*0.6) lc(dkorange*0.6) text(-70 2000 "计划", color(dkorange*0.7) size(*1.5)) fintensity(inten20) ||, ///
xline(1978 1983 1988 1993 1998 2003 2008 2013 2018 2026 2031 2036 2041 2046 2051 2056 2061 2066, lc(grey*0.1)) yla(, nogrid) ysc(off) xsc(off) leg(off) ///
plotr(fc(white) lc(white)) xla(, nogrid) sch(s1mono) ///
text(750 1978 "1978", color(grey*0.2)) ///
text(750 1988 "1988", color(grey*0.2)) ///
text(750 1998 "1998", color(grey*0.2)) ///
text(750 2008 "2008", color(grey*0.2)) ///
text(750 2018 "2018", color(grey*0.2)) ///
text(-250 1978 "1978", color(grey*0.2)) ///
text(-250 1988 "1988", color(grey*0.2)) ///
text(-250 1998 "1998", color(grey*0.2)) ///
text(-250 2008 "2008", color(grey*0.2)) ///
text(-250 2018 "2018", color(grey*0.2)) ///
graphr(margin(8 8 8 18)) text(850 1998 "近四十年政府工作报告关键词对比", size(large))
gre 近四十年政府工作报告关键词对比

我觉得我的女神很漂亮,想把她的照片当成图表背景,该怎么做?

这个绘图方法来自于http://www.surveydesign.com.au/tipsgraphs.html, 该网站上给出了两种方法,经过好几天的尝试发现只有第一种比较好用,我们先来试试第一种。(这是是用纯Stata绘制, 实际上使用一些JavaScript图表库非常容易绘制)

FMFE包

这是为宋豪漳老师的金融工程和金融数学两门课程开发的R包,主要是关于期权定价、交易策略的一些函数和可视化的东西。目前正在开发中,开发期间也可以安装使用。

Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×

keyboard_arrow_up