I am trying to execute a code that takes way too much time (>6 days). Maybe there is a way of making it more efficient. Any ideas?
library(haven)
library(plyr)
AFILIAD1 <- read_sav("XXXX")
#this sav has around 6 million rows.
AFILIAD1$F_ALTA<- as.character(AFILIAD1$F_ALTA)
AFILIAD1$F_BAJA<- as.character(AFILIAD1$F_BAJA)
AFILIAD1$F_ALTA <- as.Date(AFILIAD1$F_ALTA, "%Y%m%d")
AFILIAD1$F_BAJA <- as.Date(AFILIAD1$F_BAJA, "%Y%m%d")
#starting and ending date
meses <- seq(as.Date("1900-01-01"), as.Date("2014-12-31"), by = "month")
#this is the function that needs to be more efficient
ocupados <- function(pruebas){
previo <- c()
total <- c()
for( i in 1:length(meses)){
for( j in 1:nrow(pruebas)){
ifelse(pruebas$F_ALTA[j] <= meses[i] & pruebas$F_BAJA[j] >=
meses[i], previo[j]<- pruebas$IPF[j],previo[j]<- NA)
}
total[i] <- (length(unique(previo))-1)
}
names(total)<-meses
return(total)
}
#this takes >6 days to execute
afiliado1 <- ocupados(AFILIAD1)