I have two data files, data frames 'cap' and 'id'. Data frame 'cap' containes information on certain individuals for various seasons, the name of the individual being in the first column, the next 11 columns being observational info for 11 consecutive years. The second dataframe, 'id', contains three columns, i.e. the individual names, their sex, and one index for this individual. I need to combine the information in these two files so in order to end up with one file with name, sex, index, and then the 11 columns with seasonal information:
NAME SEX INDEX SEASON01 SEASON02 SEASON03...
The problem is that these files are of different lengths, and the names are even sorted differently, so the script need to check that the name in the first column is the correct one before merging the information for each record.
Any help appreciated.
Sample 'cap':
structure(list(name = c("K1", "K2", "KI01", "KI03", "KI05", "KI07",
"KI08", "NKW-001", "NKW-002", "NKW-003", "NKW-004", "NKW-006",
"NKW-007", "NKW-008", "NKW-009", "NKW-010", "NKW-011", "NKW-013",
"NKW-014", "NKW-015", "NKW-016", "NKW-017", "NKW-018", "NKW-019",
"NKW-021", "NKW-022", "NKW-023", "NKW-024", "NKW-025", "NKW-026",
"NKW-028", "NKW-030", "NKW-031", "NKW-032", "NKW-033", "NKW-034",
"NKW-035", "NKW-036", "NKW-037", "NKW-038", "NKW-039", "NKW-040",
"NKW-041", "NKW-042", "NKW-043", "NKW-044", "NKW-045", "NKW-046",
"NKW-047", "NKW-048", "NKW-049", "NKW-050", "NKW-051", "NKW-052",
"NKW-053", "NKW-054", "NKW-055", "NKW-056", "NKW-058", "NKW-059",
"NKW-060", "NKW-061", "NKW-062", "NKW-063", "NKW-064", "NKW-065",
"NKW-068", "NKW-069", "NKW-070", "NKW-071", "NKW-072", "NKW-073",
"NKW-074", "NKW-076", "NKW-077", "NKW-078", "NKW-079", "NKW-080",
"NKW-081", "NKW-082", "NKW-083", "NKW-084", "NKW-085", "NKW-086",
"NKW-087", "NKW-088", "NKW-089", "NKW-090"), season01 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), season02 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), season03 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), season04 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), season05 = c(0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), season06 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), season07 = c(0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0), season08 = c(1, 0, 0, 0,
0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1,
1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1
), season09 = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0,
1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1,
0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,
1, 0, 0, 0, 0, 0, 0, 1, 0, 0), season10 = c(0, 0, 1, 1, 0, 1,
1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0,
1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), season11 = c(0,
0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0)), class = "data.frame", row.names = c(NA, -88L))
Sample 'id':
structure(list(name = c("K1", "K2", "K3", "K4", "KI01", "KI02",
"KI03", "KI05", "KI06", "KI07", "KI08", "KI10", "NKW-0001", "NKW-0002",
"NKW-0003", "NKW-0004", "NKW-0005", "NKW-0006", "NKW-0007", "NKW-0008",
"NKW-0009", "NKW-0010", "NKW-0011", "NKW-0012", "NKW-0013", "NKW-0014",
"NKW-0015", "NKW-0016", "NKW-0017", "NKW-0018", "NKW-0019", "NKW-0020",
"NKW-0021", "NKW-0022", "NKW-0023", "NKW-0024", "NKW-0025", "NKW-0026",
"NKW-0027", "NKW-0028", "NKW-0029", "NKW-0030", "NKW-0031", "NKW-0032",
"NKW-0033", "NKW-0034", "NKW-0035", "NKW-0036", "NKW-0037", "NKW-0038",
"NKW-0039", "NKW-0040", "NKW-0041", "NKW-0042", "NKW-0043", "NKW-0044",
"NKW-0045", "NKW-0046", "NKW-0047", "NKW-0048", "NKW-0049", "NKW-0050",
"NKW-0051", "NKW-0052", "NKW-0053", "NKW-0054", "NKW-0055", "NKW-0056",
"NKW-0057", "NKW-0058", "NKW-0059", "NKW-0060", "NKW-0061", "NKW-0062",
"NKW-0063", "NKW-0064", "NKW-0065", "NKW-0066", "NKW-0067", "NKW-0068",
"NKW-0069", "NKW-0070", "NKW-0071", "NKW-0072", "NKW-0073", "NKW-0074",
"NKW-0075", "NKW-0076", "NKW-0077", "NKW-0078", "NKW-0079", "NKW-0080",
"NKW-0081", "NKW-0082", "NKW-0083", "NKW-0084", "NKW-0085", "NKW-0086",
"NKW-0087", "NKW-0088", "NKW-0089", "NKW-0090"), sex = c("F",
"M", "M", "U", "M", "F", "F", "M", "F", "M", "M", "F", "M", "M",
"M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M",
"M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "F", "M",
"M", "M", "M", "M", "M", "M", "M", "M", "M", "U", "M", "U", "M",
"M", "M", "U", "M", "M", "M", "F", "M", "F", "M", "F", "M", "M",
"M", "M", "U", "U", "M", "M", "M", "M", "M", "M", "M", "U", "M",
"M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M",
"M", "M", "M", "M", "U", "M", "M", "M", "M", "M"), index = c(1,
1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 3, 3, 2, 2, 3, 3, 3, 2, 3,
2, 2, 3, 2, 3, 3, 3, 2, 2, 2, 3, 2, 3, 2, 2, 2, 2, 3, 2, 2, 3,
2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 3, 2,
3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 2, 3, 2, 3,
3, 1, 2, 2, 2, 2, 3, 2, 3, 2, 2, 2, 2, 0, 3, 2, 2)), class = "data.frame", row.names = c(NA,
-102L))