-
Notifications
You must be signed in to change notification settings - Fork 0
/
05-predicted_plots.R
152 lines (127 loc) · 5.25 KB
/
05-predicted_plots.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# National totals calculated with imputed data
library(dplyr)
library(tidyr)
library(ggplot2)
library(usmap)
# Input (raw) data
# summarize to national totals
raw_sum <- readr::read_csv('derived_data/all_transformed.csv') %>%
group_by(year) %>%
summarize(tot_kha = sum(farm_kha, na.rm = TRUE),
kha_n = sum(!is.na(farm_kha)),
tot_knumber = sum(farm_knumber, na.rm = TRUE),
knumber_n = sum(!is.na(farm_knumber)),
tot_msales = sum(farm_msales, na.rm = TRUE),
msales_n = sum(!is.na(farm_msales)),)
# long
vars <- data.frame(metric = c('area', 'sales', 'number'), label = factor(c("area~(kha~yr^-1)", "number~(k~yr^-1)", "sales~(MM~yr^-1)")))
raw_long <- raw_sum %>%
mutate(year = lubridate::ymd(year, truncated = 2L)) %>%
mutate(area = tot_kha, number = tot_knumber * 1000, sales = tot_msales) %>%
select(-tot_kha:-msales_n) %>%
tidyr::pivot_longer(cols = area:sales,
names_to = 'metric',
values_to = 'value') %>%
group_by(year, metric) %>%
summarise(total = sum(value)) %>%
left_join(vars)
# Imputed data
preds <- readr::read_csv('code/BUGS/mod3/predicted_3a.csv') %>%
mutate(value = case_when(is.na(obs) ~ pred.mean,
!is.na(obs)~ obs),
year = as.factor(Year))
wide_preds <- preds %>%
arrange(year, state) %>%
select(state, year, type, obs) %>%
pivot_wider(names_from = type, values_from = obs) %>%
select(state, year, farm_kha, farm_knumber, farm_msales)
ggplot(data = preds, aes(obs, pred.mean)) +
geom_point(aes(color = year)) +
geom_abline(slope = 1)+
facet_wrap(~type, scales = 'free')
ggplot(data = preds, aes(obs, pred.median)) +
geom_point(aes(color = year)) +
geom_abline(slope = 1)+
facet_wrap(~type, scales = 'free')
ggplot(data = preds, aes(pred.mean, pred.median)) +
geom_point(aes(color = year)) +
geom_abline(slope = 1)+
facet_wrap(~type, scales = 'free')
pred_wide <- wide_preds %>%
pivot_wider(names_from = year, values_from = starts_with('farm'))
imputed <- preds %>%
arrange(year, state) %>%
select(state, year, type, value) %>%
pivot_wider(names_from = type, values_from = value) %>%
select(state, year, farm_kha, farm_knumber, farm_msales)
imputed_wide <- imputed %>%
pivot_wider(names_from = year, values_from = starts_with('farm'))
readr::write_csv(imputed_wide, 'derived_data/imputed_wide.csv')
params <- readr::read_csv('code/BUGS/mod3/params_3a.csv')
slopes <- params %>% filter(Parameter == 'slope') %>%
select(state = State, variable = Variable, slope = mean, significant) %>%
pivot_wider(names_from = variable, values_from = c(slope, significant))
readr::write_csv(slopes, 'derived_data/slopes.csv')
imputed_long <- imputed %>%
tidyr::pivot_longer(cols = farm_kha:farm_msales,
names_to = 'metric',
values_to = 'value')
readr::write_csv(imputed_long, 'derived_data/imputed_long.csv')
## Table 1, Figure 1
vars <- data.frame(metric = c('area', 'sales', 'number'), label = factor(c("area~(kha~yr^-1)", "number~(k~yr^-1)", "sales~(MM~yr^-1)")))
imp_summary <- imputed %>%
mutate(year = lubridate::ymd(year, truncated = 2L)) %>%
mutate(area = farm_kha, number = farm_knumber * 1000, sales = farm_msales) %>%
select(-farm_kha:-farm_msales) %>%
tidyr::pivot_longer(cols = area:sales,
names_to = 'metric',
values_to = 'value') %>%
group_by(year, metric) %>%
summarise(total = sum(value)) %>%
left_join(vars)
imp_summary %>%
pivot_wider(id_cols = metric, names_from = year, values_from = total ) %>%
knitr::kable()
plot_names <- as_labeller(c(area = "Area~(10^3~ha)",
number = "Farm~Number",
sales = "Sales~(million~'USD in 2020')"),
default = label_parsed)
fig2 <- ggplot() +
geom_point(data = imp_summary,
aes(year, total, group = metric)) +
# geom_point(data = raw_long,
# aes(year, total, group = metric, color = "raw")) +
geom_line(data = imp_summary,
aes(year, total, group = metric)) +
facet_wrap(~metric, scales = 'free_y', ncol = 3, labeller = plot_names) +
theme_minimal() +
scale_y_continuous("National total",
limits = c(0, NA), labels = scales::comma) +
theme_minimal() +
theme(axis.title.x = element_blank(),
panel.grid.minor = element_blank())
ggsave(filename = "figures/Fig2_imputed_natl_sum.png",
plot = fig2,
device = "png",
height = 2,
width = 8,
units = "in",
dpi = 600)
imputed_long %>%
group_by(year, metric) %>%
summarise(total = sum(value))
## Imputed data map, total
imp_summary
us_states <- map_data("state")
fig1 <- plot_usmap(data = imputed %>% filter(year == 2019) %>% mutate(number = farm_knumber * 1000),
values = 'number', color = 'gray') +
scale_fill_binned(name = "Farm\nNumber", trans = 'log', breaks = c(10, 50, 100, 500, 1000), low = 'white', high = 'black') +
ggtitle("2019 US total: 16,476") +
theme(legend.position = 'right')
ggsave(filename = "figures/Fig1_2019number_bystate.png",
plot = fig1,
device = "png",
height = 4,
width = 7,
units = "in",
dpi = 600)