I am trying to create a dataframe where I transform and select variables based on the value rows take in two different columns. Let me explain.
This is what my data frame currently looks like:
ONSConstID
is an id of each constituency. Each row is a different constituency.
All columns ending in vote15
are vote counts for each party in a constituency.
All columns ending in sex15
are the candidates' gender for each party.
The other columns aren't really relevant for what I am trying to do.
dput(c[1:40,])
structure(list(ONSConstID = structure(c("W07000049", "W07000058",
"S14000001", "S14000002", "S14000058", "S14000003", "E14000530",
"E14000531", "E14000532", "W07000043", "E14000533", "S14000004",
"W07000057", "S14000005", "E14000534", "E14000535", "E14000536",
"E14000537", "E14000538", "S14000006", "S14000010", "S14000048",
"E14000539", "S14000007", "E14000540", "E14000541", "E14000542",
"E14000543", "E14000544", "E14000933", "E14000545", "E14000546",
"E14000547", "E14000548", "E14000549", "E14000550", "E14000551",
"E14000552", "E14000813", "E14000841"), label = "ONS Constituency ID", format.stata = "%9s"),
ConPPCsex19 = structure(c(1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
1, 0, 1, 0, 1, 1, 0, 0, 1, 0), label = "Conservative 2019 candidate sex", format.stata = "%10.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), LabPPCsex19 = structure(c(0, 1,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0), label = "Labour 2019 candidate sex", format.stata = "%10.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), LDPPCsex19 = structure(c(1, 0,
1, 0, 0, 0, 0, 0, 1, 1, 1, 0, NA, 0, 1, 1, 0, 0, 0, 1, 1,
1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, NA, 1, 0, 1, 0), label = " 2019 candidate sex", format.stata = "%10.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), SNPPPCsex19 = structure(c(NA, NA,
1, 0, 0, 0, NA, NA, NA, NA, NA, 0, NA, 0, NA, NA, NA, NA,
NA, 0, 1, 1, NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), label = "Scottish National Party (SNP) 2019 candidate sex", format.stata = "%10.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), PCPPCsex19 = structure(c(0, 1,
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, 0, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), label = " 2019 candidate sex", format.stata = "%10.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), UKIPPPCsex19 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), label = "United Kingdom Independence Party (UKIP) 2019 candidate sex", format.stata = "%10.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), GreenPPCsex19 = structure(c(1,
NA, 0, NA, NA, 1, 1, 0, 1, NA, 0, NA, NA, NA, 1, 1, 1, 0,
1, NA, NA, 0, 0, NA, 1, 0, 0, 0, 0, NA, 0, NA, NA, 0, 1,
1, 1, 0, 0, 1), label = "Green 2019 candidate sex", format.stata = "%10.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), BrexitPPCsex19 = structure(c(1,
NA, 0, NA, NA, NA, NA, NA, NA, 0, NA, NA, 0, NA, NA, 0, NA,
0, NA, NA, NA, NA, NA, NA, 1, 1, 0, 0, NA, NA, NA, 1, 0,
0, 0, NA, NA, 0, NA, NA), label = "Brexit 2019 candidate sex", format.stata = "%10.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), ConPPCsex17 = structure(c(1, 0,
1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0), label = "Conservative 2017 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), LabPPCsex17 = structure(c(0, 1,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0,
0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0), label = "Labour 2017 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), LDPPCsex17 = structure(c(0, 1,
1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0), label = "Liberal Democrat 2017 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), SNPPPCsex17 = structure(c(NA, NA,
1, 0, 0, 0, NA, NA, NA, NA, NA, 0, NA, 0, NA, NA, NA, NA,
NA, 1, 1, 1, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), label = "SNP 2017 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), PCPPCsex17 = structure(c(0, 0,
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, 0, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), label = "Plaid Cymru 2017 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), UKIPPPCsex17 = structure(c(1, NA,
NA, NA, NA, NA, 0, NA, NA, 0, NA, NA, NA, NA, 0, 0, 0, 0,
0, NA, NA, NA, 0, NA, 0, 0, 0, 0, 1, 0, 0, NA, NA, NA, 0,
0, NA, NA, NA, 0), label = "UKIP 2017 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), GreenPPCsex17 = structure(c(NA,
NA, NA, NA, NA, NA, 1, NA, 1, NA, 0, NA, NA, NA, 1, 0, 1,
0, 1, NA, NA, NA, 0, NA, 1, 0, NA, 0, NA, 0, 0, NA, 1, 0,
1, 0, 1, 1, 0, 1), label = "Green Party 2017 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), ConPPCsex15 = structure(c(0, 0,
0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0), label = "Conservative 2015 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), LabPPCsex15 = structure(c(0, 1,
0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0), label = "Labour 2015 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), LDPPCsex15 = structure(c(1, 0,
0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0), label = "Liberal Democrat 2015 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), SNPPPCsex15 = structure(c(NA, NA,
1, 0, 0, 0, NA, NA, NA, NA, NA, 0, NA, 0, NA, NA, NA, NA,
NA, 1, 1, 1, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), label = "SNP 2015 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), PCPPCsex15 = structure(c(0, 0,
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, 0, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA), label = "Plaid Cymru 2015 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), UKIPPPCsex15 = structure(c(0, 0,
NA, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, NA,
1, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), label = "UKIP 2015 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), GreenPPCsex15 = structure(c(0,
1, NA, 0, 0, NA, 0, 0, 0, 0, 0, 0, NA, NA, 1, NA, 1, 1, 0,
NA, 1, NA, 0, NA, 0, 0, NA, 0, NA, NA, NA, 0, 0, 0, 0, 0,
1, 0, 0, 0), label = "Green Party 2015 candidate sex", format.stata = "%12.0g", labels = c(Male = 0,
Female = 1, `Non-binary` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), ConVote15 = structure(c(3742, 12513,
5304, 11087, 15916, 3389, 23369, 20558, 26771, 13197, 20106,
12900, 3521, 7733, 34331, 10628, 30094, 8610, 28083, 10355,
8803, 7968, 30749, 13148, 7019, 5485, 5622, 17525, 22668,
19788, 25769, 15122, 17833, 15769, 26730, 33621, 27955, 19625,
32544, 34891), label = "2015 Conservative number of votes", format.stata = "%12.0g"),
LabVote15 = structure(c(15416, 8514, 11397, 12991, 2487,
15108, 8468, 8835, 13481, 16540, 15901, 3919, 8122, 5394,
6324, 19448, 10580, 19366, 8391, 14227, 13410, 15068, 12354,
2647, 24826, 20376, 21079, 18320, 10186, 11493, 14706, 23965,
6216, 21826, 18792, 6074, 9484, 18528, 9217, 9247), label = "2015 Labour number of votes", format.stata = "%12.0g"),
LDVote15 = structure(c(1397, 1391, 2050, 2252, 11812, 678,
4076, 1330, 4235, 1733, 1360, 1216, 718, 14486, 4062, 7030,
3433, 943, 5885, 855, 917, 896, 3440, 2347, 562, 770, 1217,
1169, 1636, 1356, 3919, 1331, 14000, 2396, 2241, 3927, 3378,
1958, 4193, 3418), label = "2015 Liberal Democrat number of votes", format.stata = "%12.0g"),
SNPVote15 = structure(c(NA, NA, 24793, 20221, 22949, 23887,
NA, NA, NA, NA, NA, 24130, NA, 22959, NA, NA, NA, NA, NA,
25492, 26999, 28641, NA, 27487, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), label = "2015 Scottish National Party (SNP) number of votes", format.stata = "%12.0g"),
PCVote15 = structure(c(3663, 3536, NA, NA, NA, NA, NA, NA,
NA, 1608, NA, NA, 11790, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), label = "2015 Plaid Cymru number of votes", format.stata = "%12.0g"),
UKIPVote15 = structure(c(4971, 3467, NA, 897, 1006, 1088,
8253, 7751, 4047, 7260, 7263, 1355, 2277, 1311, 8154, 10150,
10798, 8468, 10925, 1280, NA, 1296, 8050, NA, 9554, 7941,
9045, 5070, 8538, 12097, 8290, 7865, 2922, 9080, 1586, 7310,
6108, 4434, 8966, 8579), label = "2015 United Kingdom Independence Party (UKIP) number of votes", format.stata = "%12.0g"),
GreenVote15 = structure(c(711, 727, NA, 964, 885, NA, 2025,
826, 1983, 976, 1087, 965, NA, NA, 3606, NA, 2467, 1531,
2135, NA, 645, NA, 2686, NA, 897, 938, NA, 1061, NA, NA,
NA, 1006, 5634, 1232, 1682, 2231, 1878, 1412, 2462, 2537), label = "2015 Green Party number of votes", format.stata = "%12.0g"),
BNPVote15 = structure(c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), label = "2015 British National Party (BNP) number of votes", format.stata = "%12.0g"),
Winner15 = structure(c(2, 1, 4, 4, 4, 4, 1, 1, 1, 2, 1, 4,
5, 4, 1, 2, 1, 2, 1, 4, 4, 4, 1, 4, 2, 2, 2, 2, 1, 1, 1,
2, 1, 2, 1, 1, 1, 1, 1, 1), label = "2015 Winning party", format.stata = "%12.0g", labels = c(Conservative = 1,
Labour = 2, `Liberal Democrat` = 3, `Scottish National Party` = 4,
`Plaid Cymru` = 5, UKIP = 6, Green = 7, Speaker. = 9), class = c("haven_labelled",
"vctrs_vctr", "double")), Majority15 = structure(c(33.134536687498,
13.2645614966167, 30.4898033503278, 14.8915573314659, 12.7418653525618,
19.8234204940613, 32.2595310774826, 29.6807352457149, 26.3079755329889,
8.09168804763518, 9.1978913751996, 25.2444644262111, 13.6676975816969,
16.3309754640248, 46.3498415284098, 18.6040625197747, 33.6331311441121,
27.637596998818, 30.9605009112398, 21.5767396425904, 26.7636979556466,
25.1963095657985, 31.7111432905806, 31.4251901203182, 35.4824469691689,
34.0125820568928, 31.2433470934912, 1.8370883882149, 29.009017384029,
16.8688175816463, 20.8436958323913, 17.9411227657287, 8.12644433608243,
11.9990491095307, 15.5552507299485, 49.4911874800143, 37.8480831096449,
2.38033242199366, 40.1774026868756, 43.7073902372512), label = "2015 Majority", format.stata = "%12.0g")), row.names = c(NA,
-40L), class = c("tbl_df", "tbl", "data.frame"))
I am trying to get 2 things I am having huge issues with:
- is to get only the two most voted candidates for each row (
ONSConstID
), meaning those with higher values in the variables ending withVote15
(UKIPVote15
,LabVote15
, etc.) - Is to keep those rows only if one of those two with most votes has
sex15
== 1, and to have the same variables for candidate 1 (most voted) and 2 (second most voted).
Ideally, the final data frame would look something like this. I did it for the 3 first rows:
ONSConstID <- c( "W07000058", "S14000001")
votes1 <- c(12513, 20221)
party1 <- c( "Con", "SNP")
sex1 <- c(0, 1)
votes2 <- c(8514, 11397)
party2 <- c("Lab", "Lab" )
sex2 <- c(1, 0)
idealdf <- data.frame(c(ONSConstID, votes1, party1, sex1,
votes2, party2, sex2))
idealdf
ONSConstID votes1 party1 sex1 votes2 party2 sex2
1 W07000058 12513 Con 0 8514 Lab 1
2 S14000001 20221 SNP 1 11397 Lab 0
I excluded the 1st row "W07000049"
because no candidate there had Sex15
== 1, and then I limited the information for just the 2 most voted candidates.
I have tried many different things with pivoting data back and forth from wide to longer in order to order candidates by vote and select just the 2 higher ones, but I am not able to get to the idealdf
I am showing here.
Any idea? Thank you so much in advance!