library(tidyverse)
library(tidygeocoder)
library(sf)
library(leaflet)
library(cptcity)
Geocoding of Diseases on the Iquitos - Nauta road, Loreto - Peru šš¦š£ļø
Requeriments
1. Reading data processed
<- read_csv('data/cases-report-geo-edited.csv') cases_reported
|>
cases_reported ::datatable(
DToptions = list(display = "compact", pageLength = 5, scrollX = TRUE)
)
2. Geocoding with the MapBox API
For use 100 000 request by month free.
Link: https://www.mapbox.com/pricing/#search
For to use MapBox API is necessary storage the personal TOKEN in the R environment. This step you can to do using the next code:
usethis::edit_r_environ()
MAPBOX_API_KEY="YourAPIKeyHere"
|>
cases_reported mutate(
country = 'PERU',
dep = 'LORETO',
prov = 'MAYNAS',
dist = 'SAN JUAN BAUTISTA',
nogeo = sprintf(
'%s,%s,%s,%s,%s,CENTRO POBLADO DE %s',
country,
dep,
prov,
dist,
ccpp_name,str_to_upper(address)),
geocode = geo(nogeo,method = "mapbox"),
lat = geocode$lat,
long = geocode$long
|>
) select(-c(geocode)) -> geocode_cases_reported
|>
geocode_cases_reported ::datatable(
DToptions = list(display = "compact", pageLength = 5, scrollX = TRUE)
)
3. Analysis of geocoding data of diseases
<- geocode_cases_reported |>
stats mutate(
id_geocode =
case_when(
is.na(lat) ~ 'No Geocoded',
TRUE ~ 'Geocoded'
)|>
) group_by(id_geocode) |>
summarise(total = n()) |>
mutate(percentage = total*100/sum(total))
|>
stats ggplot(aes(x = id_geocode, y = percentage)) +
geom_bar(stat = "identity", fill = '#086375',alpha = 0.5) +
geom_text(aes(label = round(percentage,2)),vjust= -0.5) +
theme_minimal()
4. No geocoding
<- geocode_cases_reported |>
review_data filter(is.na(lat) | is.na(lat))
|>
review_data ::datatable(
DToptions = list(display = "compact", pageLength = 5, scrollX = TRUE)
)
5. Repair address
- Replace ā#ā
- Remove ā/AAHH LAS BRISAS2ā
- Remove āN-ā
- Replace āCA.ā by CALLE
- Replace āAHā by ASENTAMIENTO HUMANO
<- geocode_cases_reported |>
review_data filter(is.na(lat) | is.na(lat)) |>
mutate(
address = gsub("#", "", address),
address = gsub("/AAHH LAS BRISAS2", "", address),
address = gsub("N-", "", address),
address = gsub("CA. ", "CALLE ", address),
address = gsub("AH", "ASENTAMIENTO HUMANO ", address),
address = gsub("/AAHH", "ASENTAMIENTO HUMANO ", address),
address = gsub("MZA.", "MZ ", address),
address = gsub("AA. HH", "ASENTAMIENTO HUMANO ", address),
address = gsub("/AASENTAMIENTO HUMANO H LAS BRISAS", " ASENTAMIENTO HUMANO LAS BRISAS", address)
)
6. Geocoding with the MapBox API again
<- review_data |>
geocode_review_data mutate(
country = 'PERU',
dep = 'LORETO',
prov = 'MAYNAS',
dist = 'SAN JUAN BAUTISTA',
nogeo = sprintf(
'%s,%s,%s,%s,%s,CENTRO POBLADO DE %s',
country,
dep,
prov,
dist,
ccpp_name,str_to_upper(address)),
geocode = geo(nogeo,method = "mapbox"),
lat = geocode$lat,
long = geocode$long
|>
) select(-c(geocode))
|>
geocode_review_data ::datatable(
DToptions = list(display = "compact", pageLength = 5, scrollX = TRUE)
)
7. Final dataset
<- geocode_cases_reported |>
geocode_cases_reported_clean drop_na()
<- bind_rows(
ddbb_geocoded
geocode_cases_reported_clean,
geocode_review_data)
<- ddbb_geocoded |>
ddbb_geocoded_sf drop_na(lat,long) |>
st_as_sf(coords = c('lat','long'),crs = 4326)
8. Geovisualization
|>
ddbb_geocoded leaflet() |>
addTiles() |>
addMarkers()
9. Export final data
write_csv(ddbb_geocoded,'output/cases_report_geocoded.csv')
write_sf(ddbb_geocoded_sf,'output/cases_report_geocoded.gpkg')