0

My goal is to create a new column to identify the semester enrolled in college. For example, in the attached dataset, 2016-09-01 starts semester 1, 2017-01-09 starts semester 2, and so on. The new column has to accept a date range because enrollment dates differ depending on the college where students enroll.

I started with this to try to get it to work. Then I would move to a function, but this doesn't work.

mutate(
semester = case_when(
  enroll_begin <= ymd(paste0(year(ymd(
    enroll_begin
    )) + 1, "0101")) & #2017-01-01
  enroll_begin > ymd(paste0(year(ymd(
    enroll_begin
    )), "0430")) ~ "first semester",
  #2016-04-30
  enroll_begin <= ymd(paste0(year(ymd(
    enroll_begin
    )) + 1, "0630")) & #2017-06-30
  enroll_begin > ymd(paste0(year(ymd(
    enroll_begin
    )) + 1, "0101")) ~ "second semester",
  #2017-01-01
  enroll_begin <= ymd(paste0(year(ymd(
    enroll_begin
    )) + 2, "0101")) & #2018-01-01
  enroll_begin > ymd(paste0(year(ymd(
    enroll_begin
    )) + 1, "0730")) ~ "third semester",
  #2017-07-30
  enroll_begin <= ymd(paste0(year(ymd(
    enroll_begin
    )) + 2, "0630")) & #2018-06-30
  enroll_begin > ymd(paste0(year(ymd(
    enroll_begin
    )) + 2, "0101")) ~ "fourth semester",
  #2018-01-01
  enroll_begin <= ymd(paste0(year(ymd(
    enroll_begin
    )) + 3, "0101")) & #2019-01-01
  enroll_begin > ymd(paste0(year(ymd(
    enroll_begin
    )) + 2, "0730")) ~ "fifth semester",
  #2018-07-30
  enroll_begin <= ymd(paste0(year(ymd(
    enroll_begin
  )) + 3, "0730")) & #2019-07-30
  enroll_begin > ymd(paste0(year(ymd(
    enroll_begin
    )) + 3, "0101")) ~ "sixth semester",
  #2019-01-01
  enroll_begin <= ymd(paste0(year(ymd(
    enroll_begin
    )) + 4, "0101")) & #2020-01-01
  enroll_begin > ymd(paste0(year(ymd(
    enroll_begin
    )) + 3, "0730")) ~ "seventh semester",
  #2019-07-30
  enroll_begin <= ymd(paste0(year(ymd(
    enroll_begin
    )) + 4, "0630")) & #2020-06-30
  enroll_begin > ymd(paste0(year(ymd(
    enroll_begin
    )) + 4, "0101")) ~ "eighth semester",
  #2020-01-01
  enroll_begin <= ymd(paste0(year(ymd(
    enroll_begin
    )) + 5, "0101")) & #2021-01-01
  enroll_begin > ymd(paste0(year(ymd(
    enroll_begin
    )) + 4, "0630")) ~ "ninth semester",
  #2020-06-30
  enroll_begin <= ymd(paste0(year(ymd(
    enroll_begin
    )) + 5, "0630")) & #2021-06-30
  enroll_begin > ymd(paste0(year(ymd(
    enroll_begin
    )) + 5, "0101")) ~ "tenth semester",
  #2021-01-01
))

but I'm getting this...

How do I label each subsequent record as a new term/semester after the first? Like this...

enter image description here

structure(list(sid = c("1000031_", "1000031_", "1000031_", "1000031_", 
"1000031_", "1000031_", "1000031_", "1000031_", "1000031_", "1000031_", 
"1000031_", "1000031_", "1000031_", "1000031_", "1000031_", "1000031_", 
"1000031_", "1000031_"), local_id = c("684714434_", "684714434_", 
"684714434_", "684714434_", "684714434_", "684714434_", "684714434_", 
"684714434_", "684714434_", "684714434_", "684714434_", "684714434_", 
"684714434_", "684714434_", "684714434_", "684714434_", "684714434_", 
"684714434_"), chrt_grad = c(2016, 2016, 2016, 2016, 2016, 2016, 
2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 
2016), college_enrolled = c("Y", "Y", "Y", "Y", "Y", "Y", "Y", 
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), college_name = c("AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", 
"AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", "AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", 
"AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", "AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", 
"AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", "AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", 
"AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", "AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", 
"AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", "AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", 
"AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", "AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", 
"AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", "AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", 
"AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", "AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE", 
"AZUSA PACIFIC UNIVERSITY- UNDERGRADUATE"), enroll_begin = structure(c(17045, 
17175, 17294, 17406, 17539, 17665, 17770, 17903, 18029, 18029, 
18134, 18267, 18386, 18386, 18505, 18638, 18757, 18869), class = "Date"), 
graduated = c("N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N"), semester = c("first semester", 
NA, "first semester", "first semester", NA, "first semester", 
"first semester", NA, "first semester", "first semester", 
"first semester", NA, "first semester", "first semester", 
"first semester", NA, "first semester", "first semester")), row.names = c(NA, 
-18L), class = c("tbl_df", "tbl", "data.frame"))
Shannon
  • 125
  • 7
  • 3
    Please share your data using `dput`. You need to provide a [minimal, reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) of your data. It'd be helpful to share your desired output as well since your code does not read well. – M-- Mar 15 '23 at 14:48
  • Thanks for the edit. Sharing your desired output would be really helpful to get an answer. – M-- Mar 15 '23 at 16:20
  • 1
    Can you give an example or two (especially if there are any weird edge cases) of the logic you want to follow? The current logic is not clear to me, as the first case in `case_when` seems to compare `enroll_begin` to January 1 of the next year, which will always be TRUE, making all the other cases redundant. Presumably you need to bring other variables into it, because Sept 2016 might be one student's first semester and another student's fifth semester, and that's not even entertaining the possibility of students starting mid-year. – Jon Spring Mar 15 '23 at 16:22

0 Answers0