I have a dataset that I want to convert from long to wide format. I have checked the two most popular answers (here and here), but I can't make sense of them as they apply to my data structure.
I want to retain the structure of the dataset but tranform the data in the Code
and Rating
columns and send them to their own columns to the right of the dataset. The labels in the Code
column should become their own columns (ignoring blanks), and the data in each column should be poplulated with the values (including NA
s) from the Rating
column. The structure should look like this:
"ID_Study"
"ID_Person"
"ID_Project"
"Category"
"Orig"
"Cur"
"Nonc"
"Logi"
"Impa"
"OTHB"
"T&E"
"IMP"
"VCN"
"ABS"
"OTV"
Here is the code that I have been trying, using reshape2
:
library(reshape2)
dfmolten <- melt(df, id=c("ID_Study", "ID_Person", "ID_Project", "Category", "Orig", "Cur", "Nonc", "Logi", "Impa"))
dfcast <- cast(dfmolten, ID_Project~variable, fun.aggregate=sum)
Here is my dataset:
df <- structure(list(ID_Study = c("dcb5e954-59a1-4bd0-bffc-c68ffbb320c6",
"dcb5e954-59a1-4bd0-bffc-c68ffbb320c6", "dcb5e954-59a1-4bd0-bffc-c68ffbb320c6",
"dcb5e954-59a1-4bd0-bffc-c68ffbb320c6", "dcb5e954-59a1-4bd0-bffc-c68ffbb320c6",
"dcb5e954-59a1-4bd0-bffc-c68ffbb320c6", "dcb5e954-59a1-4bd0-bffc-c68ffbb320c6",
"dcb5e954-59a1-4bd0-bffc-c68ffbb320c6", "dcb5e954-59a1-4bd0-bffc-c68ffbb320c6",
"dcb5e954-59a1-4bd0-bffc-c68ffbb320c6", "dcb5e954-59a1-4bd0-bffc-c68ffbb320c6",
"dcb5e954-59a1-4bd0-bffc-c68ffbb320c6", "dcb5e954-59a1-4bd0-bffc-c68ffbb320c6",
"dcb5e954-59a1-4bd0-bffc-c68ffbb320c6", "dcb5e954-59a1-4bd0-bffc-c68ffbb320c6",
"dcb5e954-59a1-4bd0-bffc-c68ffbb320c6", "dcb5e954-59a1-4bd0-bffc-c68ffbb320c6",
"dcb5e954-59a1-4bd0-bffc-c68ffbb320c6", "dcb5e954-59a1-4bd0-bffc-c68ffbb320c6",
"dcb5e954-59a1-4bd0-bffc-c68ffbb320c6"), ID_Person = c("fe7abc94-bc8c-457b-acac-ab60b82d004b",
"b35a7b92-9f68-496a-ad09-ce48d17b9435", "68869adf-70d4-4de6-9661-fb842b554c93",
"7e6629c5-6deb-4760-aa04-95c391bfb364", "2c16dbd1-70ab-45bf-ad8d-e4b45d2af2e7",
"fe7abc94-bc8c-457b-acac-ab60b82d004b", "7aa38d5c-b574-4d98-aa06-82a8d1b6f7e3",
"d3e956b4-0dba-4cb7-be01-159320ca5950", "b35a7b92-9f68-496a-ad09-ce48d17b9435",
"d9dadd24-e561-49c9-8611-e1382af622c6", "c92a3e2e-1ff4-4cc5-8482-aa9ff6c521bf",
"eb7193ff-22b1-4626-8cc4-8dca70b69916", "8ccbc42f-8d87-4580-8d60-6d21c0a5ecc7",
"e8ff9aae-6a37-49f0-aa4b-300c9d40078e", "7e6629c5-6deb-4760-aa04-95c391bfb364",
"e8ff9aae-6a37-49f0-aa4b-300c9d40078e", "808f65f6-be9a-42de-9f2f-000ce9af99d9",
"b0139033-f017-4e86-b41f-3785a9f4c0d7", "47c6a829-93ab-48f2-8a53-53c93778cc56",
"fcd5e4f5-2803-4e84-8e9c-872504b6e261"), ID_Project = c("877ca787-5b4b-4c7a-9fb0-4f829c743bea",
"88f2028b-2720-4c29-9382-4b0991cf14a5", "7d8567bc-f51a-49b4-be9d-f9badcf70c03",
"13a1152b-4ce5-47f9-ae08-8c3ae3758290", "b0fef202-6657-4f69-9b4f-5026a30a8585",
"acd3b520-fa67-4832-aece-e7cf6f691ab4", "bcb6e7ae-f179-4986-ba8a-161d7effd1dc",
"1d5234a0-028a-4a71-9601-45d89f065662", "8cbcfb4c-c8dd-4a49-be04-fffc631edb19",
"2992f736-2423-4fc3-ba16-1b2007fdc384", "09453ff6-d717-45a0-b2b3-50b83c6e9c3e",
"82cce490-435b-4c0d-9c02-bed7514f8064", "b16864ce-0954-4a28-83cd-37a7c5924072",
"6d5a00dc-2928-4595-bcfd-e5497c4b4e6c", "e38e4062-4886-4800-8ae7-25258b30baa9",
"b09990e4-b86e-4b65-b8ca-4246bdcd9bb1", "97f1e263-cbad-42a7-a9be-492efd4cc653",
"10c814ba-aa91-498c-91f2-8fb9ccc3f105", "e387d312-00de-453a-a45c-529e72bf6b7c",
"4fca25ec-3b9c-488f-8edf-0d89b5aa1bb5"), Category = c("", "",
"", "", "", "Work", "", "", "", "", "", "", "", "Work", "", "Academic",
"", "", "Financial / Administrative", ""), Code = c("", "OTHB",
"", "T&E", "", "IMP", "", "", "VCN", "", "", "", "", "ABS", "OTV",
"VCN", "", "", "", ""), Rating = c(NA, 3L, NA, 1L, NA, 8L, NA,
NA, 2L, NA, NA, NA, NA, 7L, 9L, 2L, NA, NA, NA, NA), Orig = c(5.17,
5.33, 5.5, 5.67, 5.5, 5.17, 6.33, 4.67, 5.33, 6.17, 6.33, 5,
5.67, 6, 5.67, 6, 6.33, 5.83, 5.83, 3.67), Cur = c(5, 5.83, 6.33,
5, 5.67, 5, 6.67, 5, 5.83, 6.17, 6, 5.5, 5.67, 5.33, 5, 5.33,
5.33, 6, 6.5, 4.17), Nonc = c(2, 4.33, 4, 2.5, 2.17, 2, 4, 3.83,
4.33, 3, 5.83, 3.33, 4.17, 3.67, 2.5, 3.67, 3.67, 4.83, 3.83,
3.67), Logi = c(5.17, 5.17, 4.33, 4.17, 5.67, 5.17, 4.5, 4.67,
5.17, 5.67, 4.5, 5.67, 3.67, 4, 4.17, 4, 5.17, 4.83, 4, 5.5),
Impa = c(5.5, 4.83, 5.67, 4.5, 6, 5.5, 4.5, 4.33, 4.83, 5.33,
6.5, 5.67, 3.5, 4, 4.5, 4, 6, 5.17, 5.17, 5.5)), class = "data.frame", row.names = c(NA,
-20L))