-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcity_access_somalia.Rmd
430 lines (346 loc) · 14.3 KB
/
city_access_somalia.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
---
title: "City access Somalia"
author: "Marcel Reinmuth"
date: "`r Sys.Date()`"
output:
html_document:
code_folding: hide
toc: true
number_sections: true
toc_float:
collapsed: true
theme: cerulean
editor_options:
chunk_output_type: console
markdown:
wrap: sentence
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
options(scipen=999)
```
# Intro
Quick exploratory analysis
# Setup
This section for reproducibility.
We use R in the version 4.3.2.
In order to be able to reproduce this analysis you need an openrouteservice API key.
Get one [here](https://openrouteservice.org/sign-up/).
Put it in a text file file called `config.txt` in the working directory.
It has no structure, just the key in the first line.
``` txt
YOURAPIKEYXXASDAWDOIJ@(
```
## libraries
Almost all of the libraries can be isntalled via CRAN with the command `install.packages('packagename')`.
The openrouteservice package is not on CRAN and needs to be installed via github.
With the command `remotes::install_github("GIScience/openrouteservice-r");` Also tmap in its most recent version 4 is not yet available on CRAN so we need to download and install it via github too.
```{r import libs, message=F, warning=F}
# main libraries
library(tidyverse)
library(glue)
library(sf)
library(units)
library(ggplot2)
library(tictoc)
library(openrouteservice) # remotes::install_github("GIScience/openrouteservice-r");
library(jsonlite)
library(terra)
library(exactextractr)
library(tmap) # remotes::install_github("r-tmap/tmap")
library(leaflet)
api_key <- readLines("config.txt", n = 1)
# function to adjust a list of coordinates to the location of the closest road segment via the snapping endpoint of ORS. ORS itself only allows for a maximum distance of 350m to snap.
ors_snap <- function(x, rowids, local=F) {
library(httr)
# Define the body
body <- list(
locations = x,
radius = 100000000 # apparently the maximum snapping distance is 5km
)
# Define the headers
headers <- c(
'Accept' = 'application/json, application/geo+json, application/gpx+xml, img/png; charset=utf-8',
'Authorization' = api_key,
'Content-Type' = 'application/json; charset=utf-8'
)
endpoint <- if(local){'http://localhost:8080/ors/v2/snap/driving-car'} else {'https://api.openrouteservice.org/v2/snap/driving-car'}
# Make the POST request
response <- POST( endpoint,
#'https://api.openrouteservice.org/v2/snap/driving-car',
body = body,
add_headers(headers),
encode = "json")
resp_content <- content(response)
extract_data <- function(x, index) {
if (is.null(x)) {
return(data.frame(
rowid = index,
lon = NA,
lat = NA,
snapped_distance = NA
))
} else {
return(data.frame(
rowid = index,
lon = x$location[[1]],
lat = x$location[[2]],
snapped_distance = x$snapped_distance
))
}
}
# Extract data from the list and create a dataframe
df <- do.call(rbind, lapply(seq_along(resp_content$locations), function(i) extract_data(resp_content$locations[[i]], i)))
df$rowid <- rowids
# Convert the dataframe to an sf object
sf_df <- df |>
drop_na(lon, lat) |> # Drop rows with NA coordinates
st_as_sf(coords = c("lon", "lat"), crs = 4326) |> # Define the coordinate columns and set CRS
st_sf()
return(sf_df)
}
options(openrouteservice.url = "http://localhost:8080/ors")
```
# Processing pipeline
We will need Data on
- Cities - greater 50k population in Somalia as our destinations from OSM
- Administrative boundaries of Somalia from HDX
- Population raster - population raster for the whole country from worldpop
Make sure the data is in the working directory.
The cities data is in geojson format, the boundaries in shapefile format and the population raster in tif format.
## Data download
```{r read in data, warning=F, message=F}
cities <- st_read('SOM_pop_50k.geojson', quiet=T)
pop <- rast('som_ppp_2020_UNadj_constrained.tif')
admin <- st_read('som_admbnda_adm2_ocha_20230308.shp', quiet=T)
```
## Sample design and preparation
The destinations are defined as greater cities.
The origins are sampled locations within Somalia.
We decided for regular sampled grid cells of 15km x 15km.
We join the population information from worldpop and snap the centroids of the grid cells to the closest road segment via the ORS API.
```{r prep and sample, message=F, warning=F}
# create 15km grid
grid1km <-
st_make_grid(admin |> st_transform(20538), cellsize = 5000, square = F, flat_topped = T)
# convert to sf df
grid1km <- grid1km |> st_as_sf()
names(grid1km) <- 'geometry'
st_geometry(grid1km) <- 'geometry'
# get rid of all cells that are not within somalia
grid1km <-
grid1km |> st_transform(4326) |> st_filter(admin |> st_union())
# join admin codes
grid1km <-
grid1km |> st_join(admin |> select(ADM2_PCODE),
left = T,
largest = T)
# join pop information
grid1km$pop <- exact_extract(pop, grid1km, 'sum', progress = F)
# add rowid
grid1km <- grid1km |> mutate(rowid = row_number())
# add rowid for the cities too
cities <- cities |> mutate(rowid = row_number())
grid1km <- grid1km |> filter(pop>0)
coord_list <-
lapply(grid1km$geometry |> st_centroid(), function(x)
c(st_coordinates(x)[, 1], st_coordinates(x)[, 2]))
# put the lsit of coords to the snapping endpoint of ors
snapped_coords <- ors_snap(coord_list,rowids = grid1km$rowid, local = T)
# Join snapped_coords to grid1km by rowid and create original centroid
grid1km_snapped <- grid1km |>
left_join(snapped_coords |> tibble(), by = "rowid") |>
mutate(centroid = st_centroid(geometry.x))
# rework names & geom for sf
names(grid1km_snapped)[5:6] <- c('geometry', 'snapped_centroid')
st_geometry(grid1km_snapped) <- 'geometry'
# separate the snapped and non snapped grid cells for somalia
# snapped
grid1km_snapped_notNA <- grid1km_snapped |>
filter(!is.na(snapped_distance))
# not snapped
grid1km_snapped_NA <- grid1km_snapped |>
filter(is.na(snapped_distance))
```
Awesome now we got `r nrow(grid1km_snapped_notNA)` snapped and `r nrow(grid1km_snapped_NA)` not snapped grid cells.
## ORS matrix request and post processing
In the next chunk we will calculate the travel times and distances from each grid cell to each city.
We will use the openrouteservice API for this.
**IMPORTANT**
The openrouteservice api is restricted to 2000 matrix requests per day.
Each request can bear up to 3500 origin x destination combinations.
In our case we do `r nrow(grid1km_snapped_notNA)` \* `r nrow(cities)` combinations.
If we like to increase the number of grid cells and therefore the amount of combinations we can do so by not running one request for one city x all grid cells as we currently do.
Instead we could run multiple requests for one city \* a subset of grid cells.
We only need to ensure that the combination for a single request oes not exceed 3500.
```{r matrix request, cache=T, warning=F, message=F}
# create a list of coordinates for the grid again but from snapped coords
coord_list <-
lapply(grid1km_snapped_notNA$snapped_centroid, function(x)
c(st_coordinates(x)[, 1], st_coordinates(x)[, 2]))
# run through all cities and calculate travel times an distances to each grid cell.
# For 12 cities and 2493 grid cells this is 2493*12 = 29916 results.
for ( i in 1:nrow(cities)) {
# log the current iteration
tic(glue("run {i} / {nrow(cities)}"))
# extract single city by iteration index
cities_subset <- cities[i,]
# get the coordinates as vector
city_coords <-
lapply(cities_subset$geometry, function(x)
c(st_coordinates(x)[, 1], st_coordinates(x)[, 2]))
# add the city coordinates at the first position before our grid centroid coordinates
coord_list_run <- c(city_coords, coord_list)
# run the ors matrix request
res1 = ors_matrix(
coord_list_run, # list of all grid and one single city coordinate at the top
sources = 0, # index of the coordiante which shall serve as origin. In our case the city. If we dont set it we get the matrix for all coords * all cords
metrics = c("duration", "distance"), # we want duration and distance, not only one of it
units = "km", # units is km
api_key = api_key, # api key
output = 'parsed') # output as parsed json
rm(coord_list_run) # get rid of the list of coordinates as we create a new one the next iteration with a different city at the top
# create a dataframe from the results if it is the first iteration, otherwise create and append to the existing one
if (i == 1) {
result_matrix <- data.frame(
city_id = cities_subset$rowid |> rep((res1$distances |> as.vector())[-1] |>
length()),
grid_id = grid1km_snapped_notNA$rowid,
durations = (res1$durations |> as.vector())[-1] |> as.numeric(),
distances = (res1$distances |> as.vector())[-1] |> as.numeric()
) |> tibble()
} else {
result_matrix <- rbind(result_matrix,data.frame(
city_id = cities_subset$rowid |> rep((res1$distances |> as.vector())[-1] |>
length()),
grid_id = grid1km_snapped_notNA$rowid,
durations = (res1$durations |> as.vector())[-1] |> as.numeric(),
distances = (res1$distances |> as.vector())[-1] |> as.numeric()
))
}
toc()
}
```
Awesome we now have the distances from all 12 cities to all grids.
The resulting dataframe has `r nrow(result_matrix)` rows.
Now we want to find the city that is closest from every grid cell in terms of traveltime.
```{r identify closest city, warning=F, message=F}
# Identify the lowest duration from each grid city combination
lowest_duration <- result_matrix |>
group_by(grid_id) |>
slice_min(durations, with_ties = F) |>
select(grid_id, city_id, durations, distances) |>
rename(shortest_duration = durations)
# Join the lowest duration to the grid data
grid1km_snapped_notNA <- grid1km_snapped_notNA |>
left_join(lowest_duration, by = c('rowid' = 'grid_id')) |>
mutate(shortest_duration = shortest_duration / 3600) # convert seconds to hours
names(grid1km_snapped_notNA)[c(6, 7)] <-
c('duration_h', 'distance_km') # account for naming
# join back in the not snapped grid cells. Those will bear a NA in the shortest_duration column
grid_final <- bind_rows(grid1km_snapped_notNA,
grid1km_snapped_NA)
```
# Visualization & assessment
In the following chunks some visualizations are executed to assess the results
## Non spatial
So what is cumulative population/area catchment of each of the cities?
We join the cities again to get the actual names of the cities instead of ids only.
```{r bar chart cities pop, warning=F, message=F}
summary_by_city <- grid_final |> group_by(city_id) |>
summarize(pop=sum(pop, na.rm=T),count=n(), km2=n()*225) |>
st_drop_geometry() |>
left_join (cities |>
st_drop_geometry() |>
select(c(name, rowid)), by=c('city_id'='rowid'))
summary_by_city |>
ggplot(aes(x=name, y=pop, fill=name)) +
geom_bar(stat='identity') +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
xlab('') + ylab('Population')
```
```{r bar chart cities area, warning=F, message=F}
summary_by_city |>
ggplot(aes(x=name, y=km2, fill=name)) +
geom_bar(stat='identity') +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
xlab('') + ylab('Area (km^2')
```
`r summary_by_city[nrow(summary_by_city),]$count` grid centroids were not able to snap and therefore create a route to a city.
This results in `r summary_by_city[nrow(summary_by_city),]$km2` km^2 area (`r (summary_by_city[nrow(summary_by_city),]$count / summary_by_city$count |> sum()*100) |> round(2)`%) and `r summary_by_city[nrow(summary_by_city),]$pop` ( `r (summary_by_city[nrow(summary_by_city),]$pop / summary_by_city$pop |> sum()*100) |> round(2)`%) we cannot assess the closest city for.
What is the relation between Traveltime and population for each grid cell?
```{r, , warning=F, message=F}
grid_final |>
ggplot(aes(x = duration_h, y = pop)) +
geom_point() + theme_minimal() +
xlab('Traveltime in h') + ylab('Population')
grid_final |>
ggplot(aes(x = duration_h, y = pop |> log())) +
geom_point() + theme_minimal() +
xlab('Traveltime in h') + ylab('Population log scaled')
```
## Spatial
Maps of population distribution, travel time to the closest city and the closest city with respective administrative boundaries and the greater cities locations.
### Population distribution
```{r map population, warning=F, message=F}
tmap_mode('view')
tm_shape(grid_final, name = 'Population') +
tm_polygons(
'pop',
fill.scale = tm_scale_intervals(
values = '-viridis',
breaks = c(0, 1, 500, 1000, 5000, 10000, 50000, 2000000)
),
fill.title = tm_legend(title = 'Population'),
lwd = 0.1,
border = 'white'
) +
grid_final |> group_by(ADM2_PCODE) |> summarise(ADM2_PCODE = first(ADM2_PCODE)) |>
tm_shape(name = 'Admin(2) boundaries') +
tm_borders(lwd = 0.5) +
tm_shape(cities, name = "Greater City") +
tm_dots()
```
### Traveltime to closest city
```{r map traveltime, warning=F, message=F}
tm_shape(grid_final, name = 'Traveltime to next city') +
tm_polygons(
'duration_h',
fill.scale = tm_scale_intervals(
values = 'hcl.heat',
breaks = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
),
fill.title = tm_legend(title = 'Traveltime to closest city'),
lwd = 0.1,
border = 'white'
) +
grid_final |> group_by(ADM2_PCODE) |> summarise(ADM2_PCODE = first(ADM2_PCODE)) |>
tm_shape(name = 'Admin(2) boundaries') +
tm_borders(lwd = 0.5) +
tm_shape(cities, name = "Greater City") +
tm_dots()
```
### Closest city
```{r map closest city, warning=F, message=F}
tm_shape(grid_final, name = 'Closest City') +
tm_polygons(
fill = 'city_id',
fill.scale = tm_scale_discrete(values = 'brewer.pastel1'),
fill.title = tm_legend(title = 'Traveltime to closest city'),
lwd = 0.1,
border = 'white'
) +
grid_final |> group_by(ADM2_PCODE) |> summarise(ADM2_PCODE = first(ADM2_PCODE)) |>
tm_shape(name = 'Admin(2) boundaries') +
tm_borders(lwd = 0.5) +
tm_shape(cities, name = "Greater City") +
tm_dots()
```
# Output
Grid export in geopackage format.
```{r, warning=F, message=F}
grid_final |> select(-c(snapped_centroid, centroid)) |>
st_write('somalia_grid_shortest_duration.gpkg', append = F, quiet = T)
```