I have to rewrite an old do file written in Stata into R. I am stuck with a reshape command, which I would appreciate some help with.
The data looks like this:
dat <- structure(list(country_num = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 15,
15, 15, 15, 15, 15, 15), country = c("Germany", "Germany", "Germany",
"Germany", "Germany", "Germany", "Germany", "Germany", "Germany",
"United Kingdom", "United Kingdom", "United Kingdom", "United Kingdom",
"United Kingdom", "United Kingdom", "United Kingdom"), parlgov_id = c(558,
808, 543, 791, 772, 1180, 1537, 865, 2253, 1556, 773, 659, 1272,
1284, 467, 1250), persparty_id = c(1, 2, 3, 4, 5, 6, 7, 8, 9,
1, 2, 3, 4, 5, 6, 7), parlgovname = c("Sozialdemokratische Partei Deutschlands",
"Christlich Demokratische Union", "Freie Demokratische Partei",
"Die Linke / PDS", "Bündnis 90 / Die Grünen", "Christlich Soziale Union",
"Nationaldemokratische Partei Deutschlands", "Piratenpartei Deutschland",
"Alternative für Deutschland", "Labour Party", "Conservatives",
"Liberals", "United Kingdom Independence Party", "Scottish National Party <U+0096> Pàrtaidh Nàiseanta na h-Alba",
"Green Party", "British National Party"), abrev = c("SPD", "CDU",
"FDP", "Li/PDS", "B90/Gru", "CSU", "NPD", "Pi", "AfD", "Lab",
"Con", "Lib", "UKIP", "SNP", "GP", "BNP"), el_date1 = structure(c(1127001600,
1127001600, 1127001600, 1127001600, 1127001600, 1127001600, 1127001600,
NA, NA, 1115251200, 1115251200, 1115251200, 1115251200, 1115251200,
1115251200, 1115251200), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
pname1 = c("Social Democratic Party of Germany", "Christian Democratic Union",
"Free Democratic Party", "PDS", "Alliance 90 / Greens", "Christian Social Union",
"National Democratic Party", NA, NA, "Labour", "Conservatives",
"Liberals", "United Kingdom Independence Party", "Scottish National Party",
"Green Party", "British National Party"), vote1 = c(34.2000007629395,
27.7999992370605, 9.80000019073486, 8.69999980926514, 8.10000038146973,
7.40000009536743, 1.60000002384186, NA, NA, 35.2000007629395,
32.4000015258789, 22, 2.20000004768372, 1.5, 1, 0.7), not_run2 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), new2 = c(NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA,
NA, NA, NA, NA), disbanded2 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), splinter2 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), merged2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), joint2 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), merged_new_name2 = c(NA, NA, NA, 1, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), joint_new_name2 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), not_coded2 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), note2 = c(NA,
NA, NA, "PDS and part of the SPD", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), el_date2 = structure(c(1254009600,
1254009600, 1254009600, 1254009600, 1254009600, 1254009600,
1254009600, 1254009600, NA, 1273104000, 1273104000, 1273104000,
1273104000, 1273104000, 1273104000, 1273104000), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), pname2 = c("Social Democratic Party of Germany",
"Christian Democratic Union", "Free Democratic Party", "Die Linke",
"Alliance 90 / Greens", "Christian Social Union", "National Democratic Party",
"German Pirate Party", NA, "Labour", "Conservatives", "Liberals",
"United Kingdom Independence Party", "Scottish National Party",
"Green Party", "British National Party"), vote2 = c(23, 27.2999992370605,
14.6000003814697, 11.8999996185303, 10.6999998092651, 6.5,
1.5, 2, NA, 29, 36.0999984741211, 23, 3.09999990463257, 1.70000004768372,
1, 1.89999997615814)), row.names = c(NA, -16L), class = c("tbl_df",
"tbl", "data.frame"))
Sorry for the long code, I know it looks ugly, and that's sort of the point. I have to reshape this into something like:
dat <- structure(list(country_num = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15), country = c("Germany", "Germany", "Germany",
"Germany", "Germany", "Germany", "Germany", "Germany", "Germany",
"Germany", "Germany", "Germany", "Germany", "Germany", "Germany",
"Germany", "Germany", "Germany", "United Kingdom", "United Kingdom",
"United Kingdom", "United Kingdom", "United Kingdom", "United Kingdom",
"United Kingdom", "United Kingdom", "United Kingdom", "United Kingdom",
"United Kingdom", "United Kingdom", "United Kingdom", "United Kingdom"
), persparty_id = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
8, 8, 9, 9, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7), parlgov_id = c(558,
558, 808, 808, 543, 543, 791, 791, 772, 772, 1180, 1180, 1537,
1537, 865, 865, 2253, 2253, 1556, 1556, 773, 773, 659, 659, 1272,
1272, 1284, 1284, 467, 467, 1250, 1250), parlgovname = c("Sozialdemokratische Partei Deutschlands",
"Sozialdemokratische Partei Deutschlands", "Christlich Demokratische Union",
"Christlich Demokratische Union", "Freie Demokratische Partei",
"Freie Demokratische Partei", "Die Linke / PDS", "Die Linke / PDS",
"Bündnis 90 / Die Grünen", "Bündnis 90 / Die Grünen", "Christlich Soziale Union",
"Christlich Soziale Union", "Nationaldemokratische Partei Deutschlands",
"Nationaldemokratische Partei Deutschlands", "Piratenpartei Deutschland",
"Piratenpartei Deutschland", "Alternative für Deutschland", "Alternative für Deutschland",
"Labour Party", "Labour Party", "Conservatives", "Conservatives",
"Liberals", "Liberals", "United Kingdom Independence Party",
"United Kingdom Independence Party", "Scottish National Party <U+0096> Pàrtaidh Nàiseanta na h-Alba",
"Scottish National Party <U+0096> Pàrtaidh Nàiseanta na h-Alba",
"Green Party", "Green Party", "British National Party", "British National Party"
), abrev = c("SPD", "SPD", "CDU", "CDU", "FDP", "FDP", "Li/PDS",
"Li/PDS", "B90/Gru", "B90/Gru", "CSU", "CSU", "NPD", "NPD", "Pi",
"Pi", "AfD", "AfD", "Lab", "Lab", "Con", "Con", "Lib", "Lib",
"UKIP", "UKIP", "SNP", "SNP", "GP", "GP", "BNP", "BNP"), long_id = c(1,
2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
1, 2, 1, 2, 1, 2, 1, 2, 1, 2), not_run = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), new = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), disbanded = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
splinter = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), merged = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), joint = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), merged_new_name = c(NA, NA, NA, NA, NA, NA, NA, 1, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), joint_new_name = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), not_coded = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), note = c(NA, NA, NA, NA, NA,
NA, NA, "PDS and part of the SPD", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), el_date = structure(c(1127001600, 1254009600,
1127001600, 1254009600, 1127001600, 1254009600, 1127001600,
1254009600, 1127001600, 1254009600, 1127001600, 1254009600,
1127001600, 1254009600, NA, 1254009600, NA, NA, 1115251200,
1273104000, 1115251200, 1273104000, 1115251200, 1273104000,
1115251200, 1273104000, 1115251200, 1273104000, 1115251200,
1273104000, 1115251200, 1273104000), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), pname = c("Social Democratic Party of Germany",
"Social Democratic Party of Germany", "Christian Democratic Union",
"Christian Democratic Union", "Free Democratic Party", "Free Democratic Party",
"PDS", "Die Linke", "Alliance 90 / Greens", "Alliance 90 / Greens",
"Christian Social Union", "Christian Social Union", "National Democratic Party",
"National Democratic Party", NA, "German Pirate Party", NA,
NA, "Labour", "Labour", "Conservatives", "Conservatives",
"Liberals", "Liberals", "United Kingdom Independence Party",
"United Kingdom Independence Party", "Scottish National Party",
"Scottish National Party", "Green Party", "Green Party",
"British National Party", "British National Party"), vote = c(34.2000007629395,
23, 27.7999992370605, 27.2999992370605, 9.80000019073486,
14.6000003814697, 8.69999980926514, 11.8999996185303, 8.10000038146973,
10.6999998092651, 7.40000009536743, 6.5, 1.60000002384186,
1.5, NA, 2, NA, NA, 35.2000007629395, 29, 32.4000015258789,
36.0999984741211, 22, 23, 2.20000004768372, 3.09999990463257,
1.5, 1.70000004768372, 1, 1, 0.7, 1.89999997615814)), row.names = c(NA,
-32L), class = c("tbl_df", "tbl", "data.frame"))
In stata the code would be:
reshape long el_date pname vote not_run new disbanded splinter merged joint merged_new_name joint_new_name not_coded note, i(country_num country persparty_id parlgov_id parlgovname abrev) j(long_id)
In R I have tried something along the lines of:
cols_to_reshape <- colnames(dat)[7:74]
long <- dat %>%
gather(variable, value, cols_to_reshape)
I also tried something like:
gather(dfwide, variable, value, starts_with("stub"))
based on this suggestion, but apparently starts_with
does not work with vectors, so then I was a bit stuck. I would appreciate any help!