I have the following data frame https://www.dropbox.com/s/c02qu7uobvrc8ku/college_Rda
This is a sample of the data: (copy+paste
'able)
educational_history <- structure(list(SCH_COLLEGE_STATUS_1997_09 = structure(c(1L, 1L,
1L, 1L, 5L, 1L, 1L, 5L, 5L, 5L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_1998_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_1999_09 = structure(c(3L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2000_09 = structure(c(3L,
3L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2001_09 = structure(c(3L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2002_09 = structure(c(3L,
3L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2003_09 = structure(c(1L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2004_09 = structure(c(1L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2005_09 = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 3L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2006_09 = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2007_09 = structure(c(1L,
1L, 1L, 1L, 1L, 3L, 1L, 4L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2008_09 = structure(c(1L,
1L, 1L, 1L, 1L, 3L, 1L, 4L, 1L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2009_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2010_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 5L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), SCH_COLLEGE_STATUS_2011_09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L), .Label = c("Not enrolled in college",
"Enrolled in 2-year college", "Enrolled in 4-year college", "Enrolled in Graduate program",
"VALID SKIP", "NON-INTERVIEW"), class = "factor"), PUBID = c(1,
2, 3, 4, 5, 6, 7, 8, 9, 10)), .Names = c("SCH_COLLEGE_STATUS_1997_09",
"SCH_COLLEGE_STATUS_1998_09", "SCH_COLLEGE_STATUS_1999_09", "SCH_COLLEGE_STATUS_2000_09",
"SCH_COLLEGE_STATUS_2001_09", "SCH_COLLEGE_STATUS_2002_09", "SCH_COLLEGE_STATUS_2003_09",
"SCH_COLLEGE_STATUS_2004_09", "SCH_COLLEGE_STATUS_2005_09", "SCH_COLLEGE_STATUS_2006_09",
"SCH_COLLEGE_STATUS_2007_09", "SCH_COLLEGE_STATUS_2008_09", "SCH_COLLEGE_STATUS_2009_09",
"SCH_COLLEGE_STATUS_2010_09", "SCH_COLLEGE_STATUS_2011_09", "PUBID"
), row.names = c(NA, 10L), class = "data.frame")
I want to generate a new data frame using that data.
I only need two fields: PUBID and First year enrolled in a 4 year college. The information about the year is inside the name of the column. I tried:
FirstYear4C <- function(ID) {
ndX=which(educational_history$PUBID==ID)
educational_historyNdX=educational_history[ndX,]
year=NA
if (educational_historyNdX$SCH_COLLEGE_STATUS_1997_09=="Enrolled in 4-year college"){
year=1997
return(year)
}
if (educational_historyNdX$SCH_COLLEGE_STATUS_1998_09=="Enrolled in 4-year college"){
year=1998
return(year)
}
if (educational_historyNdX$SCH_COLLEGE_STATUS_1999_09=="Enrolled in 4-year college"){
year=1999
return(year)
}
if (educational_historyNdX$SCH_COLLEGE_STATUS_2000_09=="Enrolled in 4-year college"){
year=2000
return(year)
}
return(NA)
}
FirstYear<-unlist(lapply(X=educational_history$PUBID,FirstYear4C))
FourYearCollege<-data.frame(PUBID=educational_history$PUBID,
FirstYear=FirstYear)
I'm sure there is a better way of coding that function. Having to copy and paste column by column seems very inefficient.
PUBID 1stYear4YC
1 1999
2 2000
...
6 2000