I'd like to add a new variable to a dataframe for plotting labels, as seen in the top voted answer here
Here's the data:
small <- structure(list(Site = structure(c(1L, 20L, 20L, 6L, 18L, 7L,
8L, 4L, 6L, 20L, 15L, 8L, 14L, 3L, 20L, 4L, 20L, 1L, 15L, 18L,
15L, 1L, 15L, 11L, 20L, 20L, 16L, 4L, 14L, 3L, 2L, 4L, 4L, 11L,
14L, 4L, 15L, 20L, 20L, 18L, 15L, 14L, 4L, 20L, 6L, 4L, 4L, 15L,
7L, 20L, 2L, 5L, 6L, 3L, 7L, 14L, 14L, 5L, 4L, 14L, 20L, 12L,
6L, 5L, 20L, 4L, 8L, 20L, 4L, 4L, 15L, 6L, 5L, 20L, 14L, 13L,
8L, 20L, 14L, 20L, 8L, 3L, 18L, 6L, 19L, 19L, 20L, 19L, 18L,
4L, 1L, 8L, 4L, 1L, 6L, 4L, 7L, 6L, 14L, 15L), .Label = c("1305",
"1307", "1308", "1309", "1312", "1313", "1314", "1316", "1454",
"1455", "1456", "1457", "1458", "1459", "1461", "1462", "51242",
"735", "739", "cc82"), class = "factor"), Sample = c(807001,
549001, 1776001, 708001, 1428001, 144001, 273001, 2031001, 186001,
4449001, 495001, 1, 75001, 1395001, 801001, 393001, 48001, 186001,
633001, 252001, 1047001, 780001, 1149001, 123001, 1803001, 1920001,
93001, 1539001, 1278001, 252001, 129001, 2415001, 4839001, 222001,
798001, 2709001, 1350001, 2763001, 2367001, 852001, 1239001,
1344001, 2364001, 3141001, 1383001, 4284001, 213001, 696001,
198001, 4521001, 156001, 822001, 1056001, 753001, 48001, 678001,
132001, 189001, 2958001, 951001, 351001, 183001, 726001, 939001,
942001, 3144001, 393001, 2559001, 1230001, 3507001, 825001, 1314001,
525001, 2643001, 1080001, 48001, 657001, 129001, 219001, 2853001,
27001, 1002001, 372001, 1197001, 183001, 546001, 3189001, 1191001,
225001, 4173001, 66001, 687001, 882001, 453001, 771001, 1749001,
39001, 579001, 837001, 2130001), Species = c(1034.99999063637,
1034.38745958467, 1034.93602944106, 1034.86617039632, 1034.99994688683,
1031.33494747455, 1034.01286119925, 1034.93820951036, 1034.04207802324,
1034.99999978378, 1034.7195310584, 1.0000000000006, 1032.22721814454,
1034.99998599318, 1034.61810976437, 1033.93049848926, 1029.76888222768,
1034.4773467232, 1034.8464918186, 1033.88103519209, 1034.98062874486,
1034.99997102765, 1034.9894164954, 1031.22209052865, 1034.9393924588,
1034.95228329769, 1029.57766168241, 1034.84617251106, 1034.99716319884,
1034.01759652049, 1034.12421865692, 1034.97165766244, 1034.99999931756,
1032.5264160899, 1034.9302506075, 1034.9852789914, 1034.99732449456,
1034.99393176586, 1034.98256074378, 1034.82915901302, 1034.99407894687,
1034.99848385292, 1034.96842702321, 1034.9982024792, 1034.99990895745,
1034.99994016069, 1033.46577496826, 1034.8847185144, 1032.07439991374,
1034.99999994015, 1034.37061686683, 1033.9995317372, 1034.98014310935,
1034.7765851292, 1023.56493233315, 1034.87292826712, 1033.5159578377,
1033.67572213954, 1034.99198243374, 1034.97016563294, 1034.08306140893,
1024.75667862308, 1034.87575799201, 1033.99992196217, 1034.70367833352,
1034.99510596938, 1034.31114418187, 1034.9893038116, 1034.73597419662,
1034.99835620314, 1034.93758219921, 1034.99920511546, 1033.98476342599,
1034.99147574006, 1034.98682314168, 990.598200299325, 1034.72635751567,
1033.22853472628, 1034.07153177872, 1034.99535891042, 1020.83157812414,
1034.91858260823, 1034.25487458056, 1034.99478390852, 1033.10757669472,
1033.94643834015, 1034.99848918266, 1034, 1033.75393660622, 1034.99989098177,
1032.40178509791, 1034.75797238777, 1034.52419010051, 1034.97051191563,
1034.8976530452, 1034.89479904612, 1019.98410325148, 1034.78158579658,
1034.94325878046, 1035)), class = "data.frame", row.names = c(NA,
-100L))
The new
df should contain a 4th column called lab
, with the Site
value in it if that row had the highest Sample
value, otherwise it should be empty:
new = small %>%
mutate(lab = if_else(Sample == max(Sample), as.character(Site), NA_character_))
Instead, only the highest Sample for the 1309
site value has a lab
entry. There should be one lab
entry for every highest Site
. The Site
variable is a factor.
This image is an example that works from the link above, where there is one label entry for each state with the highest year:
This is the toy data from that question where grouping was not required:
temp.dat <- structure(list(Year = c("2003", "2004", "2005", "2006", "2007",
"2008", "2009", "2010", "2011", "2012", "2013", "2014", "2003",
"2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011",
"2012", "2013", "2014", "2003", "2004", "2005", "2006", "2007",
"2008", "2009", "2010", "2011", "2012", "2013", "2014", "2003",
"2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011",
"2012", "2013", "2014"), State = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("VIC",
"NSW", "QLD", "WA"), class = "factor"), Capex = c(5.35641472365348,
5.76523240652641, 5.24727577535625, 5.57988239709746, 5.14246402568366,
4.96786288162828, 5.493190785287, 6.08500616799372, 6.5092228474591,
7.03813541623157, 8.34736513875897, 9.04992300432169, 7.15830329914056,
7.21247045701994, 7.81373928617117, 7.76610217197542, 7.9744994967006,
7.93734452080786, 8.29289899132255, 7.85222269563982, 8.12683746325074,
8.61903784301649, 9.7904327253813, 9.75021175267288, 8.2950673974226,
6.6272705639724, 6.50170524635367, 6.15609626379471, 6.43799637295979,
6.9869551384028, 8.36305663640294, 8.31382617231745, 8.65409824343971,
9.70529678167458, 11.3102788081848, 11.8696420977237, 6.77937303542605,
5.51242844820827, 5.35789621712839, 4.38699327451101, 4.4925792218211,
4.29934654081527, 4.54639175257732, 4.70040615159951, 5.04056109514957,
5.49921208937735, 5.96590909090909, 6.18700407463007)), class = "data.frame", row.names = c(NA,
-48L), .Names = c("Year", "State", "Capex"))
And running this correctly assigns the state to the max(year) without grouping as seen in the screenshot:
temp.dat %>%
mutate(label = if_else(Year == max(Year), as.character(State), NA_character_))