Hope someone can send help for a desperate student :-) I have a set of procedure codes for which I have a different number of surgeries (here: procedures) with their respective durations. I would like to get some descriptive statistics on the durations. For that, I would like my loop to already detect and remove the outliers by IQR function. This is the code without outlier detection and removal:
# variables for output - run before each loop Counter0<-1 Procedure_codes<-NULL Number<-NULL Min_Times<-NULL Max_Times<-NULL Average_Times<-NULL Median_Times<-NULL SD_Times<-NULL #loop over all procedure codes while(Counter0<=number_of_different_procedurecodes) { a_g_procedures2<-NULL Procedure_Name<-eval(list_of_procedurecodes[Counter0]) Procedure_name<-unlist(Procedure_Name) print(Procedure_Name) a_g_procedures2$Duration<-NULL Durations<-NULL number_of_procedures<-0 #Subset data for the specific procedure a_g_procedures2<-subset(a_g_procedures1,ProcedureCode==Procedure_Name) number_of_procedures<-length(a_g_procedures2$ProcedureCode) Counter1<-1 #loop over specific procedure while(Counter1<=number_of_procedures){ a_g_procedures$Duration<-NULL TimeIn_1_Selected<-a_g_procedures2$"TimeIn_1"[Counter1] TimeIn_1_Selected<-as.POSIXct(TimeIn_1_Selected,format="%d/%m/%Y %H:%M") TimeIn_1_S<-as.numeric(TimeIn_1_Selected) TimeIn_2_Selected<-a_g_procedures2$"TimeIn_2"[Counter1] TimeIn_2_Selected<-as.POSIXct(TimeIn_2_Selected,format="%d/%m/%Y %H:%M") TimeIn_2_S<-as.numeric(TimeIn_2_Selected) TimeOut_Selected<-a_g_procedures2$"TimeOut"[Counter1] TimeOut_Selected<-as.POSIXct(TimeOut_Selected,format="%d/%m/%Y %H:%M") if (TimeIn_1_S>TimeIn_2_S) { Start_Time<-TimeIn_2_Selected } if (TimeIn_1_S<=TimeIn_2_S) { Start_Time<-TimeIn_1_Selected } print (Start_Time) print(TimeOut_Selected) Duration<-difftime(TimeOut_Selected, Start_Time, units = "mins") Durations<-c(Durations,Duration) Counter1<-Counter1+1 } Procedure_codes<-c(Procedure_codes,Procedure_name) Durations<-as.numeric(Durations) Mean_Time<-mean(Durations, digits=1) SD_Time<-sd(Durations,na.rm=TRUE) Min_Time<-min(Durations, na.rm=TRUE) Max_Time<-max(Durations, na.rm=TRUE) Median_Time<-median(Durations, na.rm=TRUE) Average_Times<-c(Average_Times,Mean_Time) SD_Times<-c(SD_Times,SD_Time) Min_Times<-c(Min_Times, Min_Time) Max_Times<-c(Max_Times, Max_Time) Median_Times<-c(Median_Times, Median_Time) Number<-c(Number,number_of_procedures) Counter0<-Counter0+1 } ag_output<-data.frame(Procedure_codes,Number,Min_Times, Max_Times, Average_Times, Median_Times, SD_Times)
This is what I would have liked to add to the loop over specific procedure:
Q<-quantile(Duration, probs=c(.25,.75), na.rm=FALSE)
iqr<-IQR(Duration)
up<-Q[2]+1.5*iqr
low<-Q[1]-1.5*iqr
remove<-Duration>(Q[1]-1.5*iqr) & Durations<(Q[1]-1.5*iqr)
setdiff(Duration, remove)
Does somebody have an idea how I could do this?
Thank you very much in advance!