My question is regarding to for loops and alternatives:
I have multiple consecutive for loops for a big data frame. In every for loop there are new variables/dataframes defined which are necessary for computing the following for loop. Is there a way (maybe lapply or similar) which I can use in this case so that the process will be faster?
databackend2 = data.frame()
databackend2 = structure(list( Class = c("T1", "T1", "T2", "T3", "T5", "Q12"), Places = c("Orlando", "Orlando", "Boston", "LA", "New York", "New York"), Names = c("Planist", "Plantist", "Engi", "Engi", "Shifter", "Automatist"), Final.Work= c(0.08, 0.05, 0.06, 0.05, 0.055, 0.043), Parent = c(NA, NA, NA, "Planist", "Engi", "Engi"), d_stage = c(1, 2, 2, 3, 5, 2))
databackend3 = data.frame()
databackend3 = structure(list( Type = NA, Places = c("Orlando", "Colorado", "Boston", "LA", "New York", "Florida"), D.Stage = c(1.4, 1.5, 2.3, 3.4, 5.1, 2.5), X4= c(3, 3, 4, 5, 5, 4), X5=c(4, 5, 5, 6, 6, 6), Names = c("Sum", "Plantist", "Engi", "Fieldor", "Shifter", "Automatist"), Cu.No.Of.Emp = c(32, 7, 8, 9, 2, 6), Sh.fact = c(NA, 1, 1, 3, 3, 4), Cu.Stage = c(1, 1, 2, 3, 5, 2), One.Target.Stage = c(3, 3, 4, 5, 5, 4), Two.Target.Stage = c(4, 5, 5, 6, 6, 6))
for (names in all_names) {
n_cu_norm_fact = n_cu = databackend3 %>% filter(Names == names) %>% pull(Cu.No..Of.Emp)
sh_fact_nas = databackend3 %>% filter(Roles == role) %>% pull(Sh.fact)
if (is.na(n_cu) | (n_cu == 0)) {
n_cu = 0
n_cu_norm_fact = 1
}
n_emp_df[n_emp_df$names == names, "n_cu"] = n_cu
df_names_one = data.frame()
df_names_two = data.frame()
all_places_names = databackend2 %>% filter(Names == names) %>% pull(Places) %>% unique()
sum_of_cu_work_names_df = data.frame()
for (place in all_places_names) {
ds_cu = databackend3 %>% filter(Places == place) %>% pull(Cu.Stage)
df_i = databackend2 %>% filter(Places == place, Places == place, d_stage == ds_cu) %>% select(Class, Final.Work)
sum_of_cu_work_names_df = sum_of_cu_work_names_df %>% bind_rows(df_i)
}
sum_of_cu_work_names = sum_of_cu_work_names_df$Final.Work %>% sum()
sum_of_cu_work_names = ifelse(n_cu == 0, 1, sum_of_cu_work_names)
for (place in all_places_names) {
ds_cu = databackend3 %>% filter(Places == place) %>% pull(Cu.Stage)
ds_target_one = databackend3 %>% filter(Places == place) %>% pull(One.Target.Stage)
ds_target_two = databackend3 %>% filter(Places == place) %>% pull(Two.Target.Stage)
df_names_place_target_one = databackend2 %>% filter(Names == names, Places == place, d_stage == ds_target_one)
df_names_places_target_two = databackend2 %>% filter(Names == names, Places == place, d_stage == ds_target_two)
df_names_place_target_cu = databackend2 %>% filter(Names == names, Places == place, d_stage == ds_cu) %>% select(Class, Final.Work)
colnames(df_names_place_cu)[2] = "Cu.Work"
df_names_place_target_one = df_names_place_target_one %>% left_join(df_names_place_cu)
df_names_place_target_two = df_names_place_target_one %>% left_join(df_names_place_cu)
df_names_place_target_one$work_norm = df_names_place_target_one$Final.Work / sum_of_cu_work_names
df_names_place_target_two$work_norm = df_names_place_target_two$Final.Work / sum_of_cu_work_names
for (class in df_names_place_target_one$Class) {
source_names = df_names_place_target_one %>% filter(Class == class) %>% slice(1) %>% pull(Parent)
if (is.na(source_names)) next
n_source_names = databackend3 %>% filter(Names == source_names) %>% pull(Cu.No..Of.Emp)
n_source_names = ifelse(n_source_names == 0, 1, n_source_names)
sh_fact_source = databackend3 %>% filter(Names == source_names) %>% pull(Sh.fact)
work_old = df_names_place_target_one %>% filter(Class == class) %>% pull(Final.Work)
sum_of_cu_work_source = databackend2 %>% filter(Names == source_names, d_stage == ds_cu) %>% pull(Final.Work) %>% sum()
df_names_place_target_one[df_names_place_target_one$Class == class, "work_norm"] = (n_source_names / sh_fact_source) * (work_old / sum_of_cu_work_source) / (n_cu_norm_fact / sh_fact_names)
}
for (class in df_names_place_target_two$Class) {
source_names = df_names_place_target_two %>% filter(Class == class) %>% slice(1) %>% pull(Parent)
if (is.na(source_names)) next
n_source_names = databackend3 %>% filter(Names == source_names) %>% pull(Cu.No..Of.Emp)
n_source_names = ifelse(n_source_names == 0, 1, n_source_names)
sh_fact_source = databackend3 %>% filter(Names == source_names) %>% pull(Sh.fact)
work_old = df_names_place_target_two %>% filter(Class == class) %>% pull(Final.Work)
sum_of_cu_work_source = databackend2 %>% filter(Names == source_names, d_stage == ds_cu) %>% pull(Final.Work) %>% sum()
df_names_place_target_two[df_names_place_target_two$Class == class, "work_norm"] = (n_source_names / sh_fact_source) * (work_old / sum_of_cu_work_source) / (n_cu_norm_fact / sh_fact_names)
}
df_names_one = df_names_one %>% bind_rows(df_names_place_target_one)
df_names_two = df_names_two %>% bind_rows(df_names_place_target_two) }
write.csv2(df_names_one, file = paste0("debugging\\one\\", names, ".csv"), row.names = FALSE)
write.csv2(df_names_two, file = paste0("debugging\\bl\\", names, ".csv"), row.names = FALSE)
sum_work_norm_one = df_names_one %>% pull(work_norm) %>% sum(na.rm = TRUE)
sum_work_norm_two = df_names_two %>% pull(work_norm) %>% sum(na.rm = TRUE)
n_emp_df[n_emp_df$names == names, "n_target_one"] = ifelse(n_cu > 0, sum_work_norm_one * n_cu, sum_work_norm_one * 1)
n_emp_df[n_emp_df$names == names, "n_target_two"] = ifelse(n_cu > 0, sum_work_norm_two * n_cu, sum_work_norm_two * 1)
}