Literal, with inference and caveats:
library(dplyr)
library(tidyr) # pivot_*, complete, fill
# library(readr)
# library(readxl)
ddd_dataset <- readxl::read_excel("ddd_dataset.xlsx")
itu_emi_countries <- readr::read_csv("itu-emi-countries.csv") %>%
rename(Country = `ITU Name`)
new_data <- ddd_dataset %>%
filter(`Indicator name` == "Population covered by at least a 4G mobile network (%)") %>%
mutate(Value = suppressWarnings(as.numeric(Value))) %>%
pivot_wider(Country, names_from = Year, values_from = Value) %>%
# we cannot impute before here, since some countries do not have all years, but now they will
pivot_longer(-Country, names_to = "Year", values_to = "Value") %>%
arrange(Country, Year) %>%
group_by(Country) %>%
fill(Value, .direction = "updown") %>%
pivot_wider(Country, names_from = Year, values_from = Value)
new_long <- left_join(new_data, itu_emi_countries, by = "Country") %>%
# inferring that you want to keep names for countries in new_data not present in itu
mutate(Country = coalesce(`EMI Name`, Country)) %>%
# inferring you want all but `EMI Name`, not just hard-coding 1:10
select(-`EMI Name`) %>%
pivot_longer(-Country, names_to = "year", values_to = "x") %>%
mutate(year = as.integer(year))
new_data
# # A tibble: 196 x 10
# Country `2012` `2013` `2014` `2015` `2016` `2017` `2018` `2019` `2020`
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 Afghanistan 0 0 0 0 0 4 7 22 26
# 2 Albania 0 0 0 35 80.2 85.3 85.5 95 98.4
# 3 Algeria 0 0 0 0 3.62 30.5 52.8 53.6 76.2
# 4 Andorra 50 50 50 50 50 85 85 85 85
# 5 Angola 7 7 7 7 8 8 8 18 30
# 6 Antigua and Barbuda 65 78.6 80 98 99 99 99 99 99
# 7 Argentina 0 0 0 65 85 85 90.8 91.2 97.7
# 8 Armenia 17.5 44 46 46.5 52.5 90.0 99.1 99.3 100
# 9 Australia 52.2 85 95 94 98 99 99.2 99.4 99.5
# 10 Austria 31.6 58.4 85 98 98 98 98 98 98
# # ... with 186 more rows
new_long
# # A tibble: 1,764 x 3
# Country year x
# <chr> <int> <dbl>
# 1 Afghanistan 2012 0
# 2 Afghanistan 2013 0
# 3 Afghanistan 2014 0
# 4 Afghanistan 2015 0
# 5 Afghanistan 2016 0
# 6 Afghanistan 2017 4
# 7 Afghanistan 2018 7
# 8 Afghanistan 2019 22
# 9 Afghanistan 2020 26
# 10 Albania 2012 0
# # ... with 1,754 more rows
But it seems unnecessary and inefficient to pivot back and forth when you ultimately want it in long format in the end. One-step:
new_long2 <- ddd_dataset %>%
filter(`Indicator name` == "Population covered by at least a 4G mobile network (%)") %>%
left_join(itu_emi_countries, by = "Country") %>%
mutate(
Country = coalesce(`EMI Name`, Country), # some `EMI Name` are missing
Value = suppressWarnings(as.numeric(Value)) # "NULL" -> NA
) %>%
complete(Country, Year) %>%
arrange(Year) %>%
group_by(Country) %>%
fill(Value, .direction = "updown") %>%
ungroup() %>%
select(Country, year = Year, x = Value)
(The only difference in the data, other than order, is that Year
is a numeric in this last block and is integer
above. This can easily be remedied, over to you.)