I need to join multiple data frames, but given that the experiment ran online and participants were often sloppy when entering their ID, I added redundancy. The also had to add letters of their parents name and their zip code. I checked manually (a bit) and there's a good amount of errors. Now I need to merge by multiple columns instead of just the participant ID.
I figured that using the fuzzyjoin package makes most sense, but I'm not sure how to go about merging multiple data frames by multiple columns each? Should I just fuzzy_right_join one data frame at a time? I have 6 in total.
Many thanks!
Here's a snippet of a few to give you an idea:
structure(list(participant = c("107", "110", "111", "116", "140",
"141"), Vorname_Mutter_2_Buchstaben = c("th", "ro", "mo", "es",
"br", "gl"), Vorname_Vater_2_Buchstaben = c("al", "ha", "wa",
"th", "he", "re"), PLZ_letzte_2_Ziffern = c(28L, 4L, 23L, 10L,
15L, 90L), date = structure(c(1587307867.619, 1586435099.121,
1586424077.282, 1587733915.271, 1586794445.732, 1586896454.853
), tzone = "UTC", class = c("POSIXct", "POSIXt")), mean_RT = c(0.658042654028436,
0.612637426900585, 0.721700276752767, 0.532778303249097, 0.448516151241535,
0.59286090389016)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
structure(list(participant = c("001", "240", "242", "243", "244",
"245"), Vorname_Mutter_2_Buchstaben = c("ma", "el", "ur", "ka",
"ja", "la"), Vorname_Vater_2_Buchstaben = c("he", "ma", "re",
"jo", "fe", "ab"), PLZ_letzte_2_Ziffern = c(27L, 3L, 3L, 0L,
47L, 66L), date = structure(c(1588072799.367, 1586624239.667,
1586260007.882, 1586712365.514, 1586275669.545, 1586696526.84
), tzone = "UTC", class = c("POSIXct", "POSIXt")), RT_moving_variance = c(6258.46945397108,
5172.19983111429, 5032.90280000055, 5906.46678346693, 18694.9916770777,
7065.17254133398)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
structure(list(participant = c("1", "105", "107", "110", "111",
"116"), Vorname_Mutter_2_Buchstaben = c("ma", "an", "th", "ro",
"mo", "es"), Vorname_Vater_2_Buchstaben = c("he", "ce", "al",
"ha", "wa", "th"), PLZ_letzte_2_Ziffern = c("27", "0", "28",
"4", "23", "10"), date = structure(c(1588071580.734, 1587402995.471,
1587306792.774, 1586434189.309, 1586422686.217, 1587732745.487
), tzone = "UTC", class = c("POSIXct", "POSIXt")), on_task_mean = c(1,
1, 2, 2, 1, 1)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
structure(list(participant = c("270", "494", "261", "171", "177",
"323"), Vorname_Mutter_2_Buchstaben = c("se", "br", "ma", "do",
"ir", "li"), Vorname_Vater_2_Buchstaben = c("na", "th", "ar",
"sv", "re", "ur"), PLZ_letzte_2_Ziffern = c("02", "38", "67",
"03", "10", "07"), date = structure(c(1586187946.415, 1586212359.648,
1586251863.165, 1586255167.624, 1586255616.763, 1586258326.743
), tzone = "UTC", class = c("POSIXct", "POSIXt")), Alter = c(26,
27, 21, 28, 25, 22)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))