I have created a dataframe p
based on a larger dataset d
, meaning that all information in p
can be found in d
, but not the other way around.
Unfortunately, I just realized that I forgot to extract d$Gender
before initiating datamanagement of p
. Therefore, I am looking for a solution (preferable in dplyr
) to match/extract d$Gender
corresponding the same patient in p
.
Fortunately, I know the exact date of surgery and age of the patients. So, the extraction from d$Gender
to p$Gender
should be: if p$DOS = d$DOS
& p$Age = d$Age
then d$Gender
--> p$Gender
Just to clarify, the dates and age applied in this example are made up and does not correspond to any actual patients.
I have tried something like distinct(.., .keep_all = TRUE)
and the approaches described here, but without luck. I also tried ifelse()
.
p <- structure(list(DOS = c("09.01.2018", "21.07.2015", "29.07.2013",
"06.04.2017", "18.02.2013", "16.05.2001", "16.06.2014", "02.09.2014",
"14.01.2013", "18.04.2017", "15.05.2017", "13.06.2019", "02.08.2016",
"01.12.2016", "12.04.2018", "14.11.2017", "26.04.2019", "15.06.2017",
"13.11.2017", "08.05.2018", "24.05.2018", "09.03.2017", "03.11.2014",
"17.06.2013", "03.08.2015", "12.08.2016", "09.05.2019", "14.05.2019",
"12.12.2017", "20.02.2018", "16.04.2019", "16.10.2018", "16.03.2017",
"10.01.2020", "15.01.2020", "04.11.2019", "23.05.2019", "25.04.2019",
"15.11.2018", "07.03.2019", "24.01.2019", "10.01.2019", "06.09.2018",
"27.07.2017", "13.07.2017", "23.03.2017", "12.01.2017", "11.08.2016",
"05.05.2016", "05.05.2016", "11.02.2016", "03.12.2015", "14.06.2018",
"03.09.2015", "11.06.2015", "19.07.2016", "13.02.2019", "13.03.2018",
"04.08.2015", "18.09.2019", "15.08.2017", "22.12.2015", "14.05.2019",
"29.06.2017", "30.04.2019", "13.11.2018", "05.02.2019", "08.01.2019",
"23.10.2018", "11.09.2018", "04.09.2018", "28.08.2018", "22.05.2018",
"08.05.2018", "28.11.2017", "19.09.2017", "15.08.2017", "11.07.2017",
"23.05.2017", "28.03.2017", "16.02.2016", "05.01.2016", "20.10.2015",
"13.07.2015", "23.06.2015", "14.10.2014", "28.01.2014", "26.11.2013",
"13.12.2018", "10.09.2019", "19.01.2016", "30.07.2013", "18.03.2019",
"11.02.2019", "21.01.2019", "07.01.2019", "14.06.2016", "26.04.2016",
"04.04.2016", "09.02.2016"), Age = c(80, 66, 88, 65, 54, 36,
59, 65, 59, 54, 57, 70, 68, 64, 69, 33, 50, 69, 69, 68, 77, 57,
42, 69, 64, 59, 45, 44, 63, 43, 57, 80, 69, 79, 72, 61, 77, 47,
71, 65, 75, 55, 68, 61, 35, 54, 68, 53, 69, 32, 60, 72, 67, 72,
75, 44, 48, 72, 66, 29, 79, 41, 52, 55, 89, 77, 72, 60, 74, 33,
75, 82, 66, 61, 69, 62, 86, 77, 78, 61, 69, 39, 61, 69, 62, 70,
57, 42, 59, 50, 74, 76, 47, 62, 75, 61, 65, 67, 73, 60)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -100L))
And
d <- structure(list(DOS = c("16.10.2018", "16.03.2017", "10.01.2020",
"15.01.2020", "04.11.2019", "05.07.2019", "13.06.2019", "12.07.2018",
"30.05.2019", "23.05.2019", "23.05.2019", "25.04.2019", "15.11.2018",
"07.03.2019", "24.01.2019", "10.01.2019", "06.09.2018", "03.01.2019",
"27.07.2017", "13.11.2018", "13.07.2017", "23.03.2017", "12.01.2017",
"11.08.2016", "05.05.2016", "04.09.2018", "16.08.2018", "05.05.2016",
"11.02.2016", "03.12.2015", "14.06.2018", "03.09.2015", "11.06.2015",
"19.07.2016", "01.05.2018", "31.07.2019", "01.07.2019", "13.02.2019",
"13.03.2018", "04.08.2015", "19.12.2017", "18.09.2019", "15.08.2017",
"22.12.2015", "30.07.2019", "25.06.2019", "04.06.2019", "28.05.2019",
"14.05.2019", "29.06.2017", "30.04.2019", "16.04.2019", "19.03.2019",
"13.11.2018", "05.02.2019", "08.01.2019", "23.10.2018", "11.09.2018",
"04.09.2018", "28.08.2018", "22.05.2018", "08.05.2018", "28.11.2017",
"19.09.2017", "15.08.2017", "11.07.2017", "23.05.2017", "28.03.2017",
"16.02.2016", "05.01.2016", "20.10.2015", "13.07.2015", "23.06.2015",
"14.10.2014", "28.01.2014", "26.11.2013", "13.12.2018", "10.09.2019",
"29.07.2019", "06.05.2019", "19.01.2016", "30.07.2013", "01.07.2019",
"18.03.2019", "11.02.2019", "21.01.2019", "07.01.2019", "14.06.2016",
"26.04.2016", "04.04.2016", "09.02.2016", "10.11.2015", "01.06.2015",
"05.08.2014", "17.08.2015", "14.08.2017", "10.04.2017", "30.03.2015",
"15.12.2014", "24.11.2014"), Age = c(80, 69, 79, 72, 61, 51,
70, 82, 64, 77, 50, 47, 71, 65, 75, 55, 68, 80, 61, 60, 35, 54,
68, 53, 69, 51, 72, 32, 60, 72, 67, 72, 75, 44, 75, 43, 36, 48,
72, 66, 61, 29, 79, 41, 66, 67, 68, 79, 52, 55, 89, 62, 55, 77,
72, 60, 74, 33, 75, 82, 66, 61, 69, 62, 86, 77, 78, 61, 69, 39,
61, 69, 62, 70, 57, 42, 59, 50, 77, 68, 74, 76, 68, 47, 62, 75,
61, 65, 67, 73, 60, 78, 72, 51, 73, 70, 69, 83, 20, 69), Gender = c("M",
"F", "F", "F", "M", "F", "M", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "M", "F", "F", "M", "F", "F", "M", "F", "M", "F", "F",
"F", "F", "F", "M", "M", "F", "F", "M", "M", "F", "F", "M", "F",
"F", "F", "M", "M", "M", "M", "F", "M", "F", "F", "F", "F", "F",
"M", "F", "F", "F", "M", "F", "M", "M", "F", "F", "F", "F", "F",
"M", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "M",
"M", "F", "M", "F", "F", "M", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "M", "M", "M", "F")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -100L))