I am trying to find out the connecting transaction. From the first TRUE to last TRUE, its considered as one transaction and also find out in the transaction, the tpt_mode whether is mixed or pure. Then, insert a new column with new data but currently now the for loop is working with little volume of data, when it comes to huge volume of data, it tends to run very slow. How can I optimize the for loop to speed up the performance?
firstid<-1
currTpt <- 'NA'
count<-0
n <- nrow(tnx)
for (i in 1:n) {
if(tnx$FIRST[i]){
firstid<-i
currTpt <-tnx$mode[i]
count <-1
}
else{
count <- count + 1
}
if(as.character(tnx$mode[i])!= as.character(currTpt)){
currTpt <- 'both'
}
if(tnx$LAST[i])
{
tnx$final_end_loc[firstid]<-tnx$end_loc[i]
tnx$final_end_date[firstid]<-as.character(tnx$end_date[i])
tnx$final_end_time[firstid]<-as.character(tnx$end_time[i])
tnx$final_mode[firstid]<-as.character(currTpt)
tnx$final_count[firstid] <- count
}
}
final_tnx<-subset(tnx,FIRST==TRUE,c("id","start_date","start_time","final_end_date","final_end_time","start_loc","final_end_loc","final_mode","final_count"))
Sample data: EDIT
tnx<- data.frame(
id=c("A","A","A","A","C","C","D","D","E"),
mode=c("on","on","off","on","on","off","off","off","on"),
start_time=c("8:20:22","17:20:22","17:45:22","18:20:22","16:35:22","17:20:22","15:20:22","16:00:22","12:20:22"),
end_time=c("8:45:22","17:30:22","18:00:22","18:30:22","17:00:22","17:50:22","15:45:22","16:14:22","27:50:22"),
start_loc=c("12","12","207","12","11","65","222","32","12"),
end_loc=c(31,31,29,11,22,12,45,31,11),
start_date=c("6/3/2012","6/3/2012","6/3/2012","6/3/2012","6/3/2012","6/3/2012","6/3/2012","6/3/2012","6/3/2012"),
end_date=c("6/3/2012","6/3/2012","6/3/2012","6/3/2012","6/3/2012","6/3/2012","6/3/2012","6/3/2012","6/3/2012"),
FIRST=c(T,T,F,F,T,F,T,F,T),
LAST=c(T,F,F,T,F,T,F,T,T)
)
Sample dataset in picture form:
Expected results:
Thanks in advance.