I am working with the following two datasets :
will_can
structure(list(will_can.REGION = c("AB", "B", "B", "B", "BB",
"BB", "BD", "BH", "BH", "BH", "BR", "BS", "BS", "BT", "BT", "CF",
"CF", "CM", "CO", "CV", "CV", "CV", "CW", "DA", "DA", "DD", "DE",
"DE", "DG", "DG", "DG", "DG", "DL", "DN", "DT", "E", "E", "E",
"EH", "EH", "EH", "EH", "EH", "EH", "EH", "EX", "EX", "EX", "FK",
"FK", "FY", "G", "G", "G", "GL", "GL", "HA", "HD", "HD", "IV",
"KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA",
"KA", "KA", "KA", "KA", "KA", "KA", "KT", "KY", "KY", "KY", "L",
"L", "L", "LA", "LA", "LE", "LE", "M", "M", "ME", "ME", "MK",
"ML", "N", "N", "N", "NE", "NG", "NN", "NN", "NR", "NW", "OL",
"OX", "OX", "PH", "PO", "PR", "RG", "RH", "RM", "RM", "S", "S",
"S", "S", "SA", "SE", "SE", "SE", "SE", "SE", "SG", "SL", "SN",
"SN", "SO", "SO", "SO", "SS", "ST", "ST", "ST", "ST", "SW", "SW",
"SW", "SW", "SY", "SY", "SY", "TA", "TD", "TD", "TN", "TW", "UB",
"UB", "W", "W", "W", "W", "WA", "WC", "WD")), class = "data.frame", row.names = c(NA,
-156L))
will_can_region_norm
structure(list(norm = c(67.3112073766083, 0, 62.9924341677094,
0, 134.940019161483, 86.0271073135687, 233.710968710152, 0, 0,
136.210220315945, 72.0106074505199, 54.9624828839958, 0, 0, 46.5061888459603,
0, 51.9149234846709, 85.3970454501009, 0, 0, 141.438961332615,
122.50716299382, 197.887432921107, 96.646567080111, 108.996678489718,
873.779493880704, 0, 109.106806944561, 56.7421763178016, 249.99781251914,
0, 106.993398828272, 0, 182.997053590583, 0, 225.716259764203,
217.655353412983, 98.8344746903195, 70.3435951664196, 106.870878390986,
0, 0, 113.255439262354, 226.344150395729, 0, 0, 0, 0, 0, 0, 0,
0, 0, 92.5698187029358, 0, 1159.88543061088, 59.5746039659052,
0, 217.977759293264, 88.627745595238, 155.299651064979, 0, 70.3301130229532,
0, 0, 0, 0, 36.166169734453, 162.12380892704, 74.7710230881704,
112.29824076945, 120.249189991435, 25.6209421071498, 36.7120335621411,
115.238964414265, 0, 50.4621322067494, 59.9490876378327, 82.9160720202368,
132.342362545417, 0, 0, 209.987774511768, 0, 45.0104437732687,
59.5244437425851, 54.7420581590574, 77.921490980977, 132.545922191567,
100.083647410414, 51.5757713324224, 102.602449571922, 98.8984492920948,
0, 129.885834248271, 0, 189.332549749021, 149.846130500895, 0,
0, 73.4653456617979, 220.103517986062, 111.317004279081, 375.711503660056,
156.229153172374, 760.35739839154, 0, 83.1515916711375, 0, 0,
0, 73.5483180088058, 269.518568414391, 102.141462145838, 55.2886923953334,
151.949727736478, 148.297412239816, 0, 0, 0, 0, 0, 0, 0), REGION = c("AB",
"AL", "B", "BA", "BB", "BD", "BH", "BL", "BN", "BR", "BS", "BT",
"CA", "CB", "CF", "CH", "CM", "CO", "CR", "CT", "CV", "CW", "DA",
"DD", "DE", "DG", "DH", "DL", "DN", "DT", "DY", "E", "EC", "EH",
"EN", "EX", "FK", "FY", "G", "GL", "GU", "GY", "HA", "HD", "HG",
"HP", "HR", "HS", "HU", "HX", "IG", "IM", "IP", "IV", "JE", "KA",
"KT", "KW", "KY", "L", "LA", "LD", "LE", "LL", "LN", "LS", "LU",
"M", "ME", "MK", "ML", "N", "NE", "NG", "NN", "NP", "NR", "NW",
"OL", "OX", "PA", "PE", "PH", "PL", "PO", "PR", "RG", "RH", "RM",
"S", "SA", "SE", "SG", "SK", "SL", "SM", "SN", "SO", "SP", "SR",
"SS", "ST", "SW", "SY", "TA", "TD", "TF", "TN", "TQ", "TR", "TS",
"TW", "UB", "W", "WA", "WC", "WD", "WF", "WN", "WR", "WS", "WV",
"YO", "ZE")), row.names = c(NA, -124L), class = "data.frame")
I am trying to add a new column, will_can$norm
, which would contain values of will_can_region_norm$norm
based on matching values of the variable "REGION"
which is the same in both datasets. So gaps from the second dataset to the first would be automatically filled based on matching strings of REGION
Based on another question in the forum I tried the following function:
will_can2 <- merge(will_can, will_can_region_norm[,"norm"], by = "REGION", all=TRUE)
But I get the following error:
Error in fix.by(by.y, y) : 'by' must specify a unique correct column [translated from French]
Is there something I'm missing here? Would be grateful for some help !
Cameron