Considering that your Input_file is:
cat Input_file
Done City Start_time End_time
Yes Chicago 10:16:51,14-Dec-2018 10:19:38,14-Dec-2018
Yes Atlanta 10:12:58,14-Dec-2018 10:20:58,14-Dec-2018
No Minnetonka 10:16:38,14-Dec-2018 10:21:50,14-Dec-2018
Yes Hopkins 10:22:20,14-Dec-2018 10:18:11,14-Dec-2018
Which rules does the script follow:
This code should take care of which DATE COLUMN is having greater value than other it will take the difference in that manner. Eg. last column's time is greater than 2nd last column then it will do last_col_time-second_last_col_time
else it will do vice versa.
I have changed 14-Dec-2018
month to all smaller letter so even they are in any form (small, capital, or mix) we should be good at it.
I have NOT hardcoded 3rd and 4th column values in code, since 2nd column (which is city) may have spaces in between city names, so rather I have taken column values from last fields like $(NF-1)
(second last column) and $NF
(last column value).
This is all done as:
awk '
BEGIN{
num=split("jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec",month,",")
for(i=1;i<=12;i++){
a[month[i]]=i
}
}
FNR==1{
print $0,"Time_diff"
next
}
{
split($(NF-1),array,"[:,-]")
split($(NF),array1,"[:,-]")
val=mktime(array[6]" "a[tolower(array[5])]" "array[4]" "array[1]" "array[2]" "array[3])
val1=mktime(array1[6]" "a[tolower(array1[5])]" "array1[4]" "array1[1]" "array1[2]" "array1[3])
delta=val>=val1?val-val1:val1-val
hrs = int(delta/3600)
min = int((delta - hrs*3600)/60)
sec = delta - (hrs*3600 + min*60)
printf "%s\t%02d:%02d:%02d\n", $0, hrs, min, sec
hrs=min=sec=delta=""
}
' Input_file | column -t
Output will be as follows.
Done City Start_time End_time Time_diff
Yes Chicago 10:16:51,14-Dec-2018 10:19:38,14-Dec-2018 00:02:47
Yes Atlanta 10:12:58,14-Dec-2018 10:20:58,14-Dec-2018 00:08:00
No Minnetonka 10:16:38,14-Dec-2018 10:21:50,14-Dec-2018 00:05:12
Yes Hopkins 10:22:20,14-Dec-2018 10:18:11,14-Dec-2018 00:04:09
Explanation of above code: Sorry, we need to scroll on the right side here.
awk ' ##Starting awk code here.
BEGIN{ ##Mentioning BEGIN section of awk here.
num=split("jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec",month,",") ##Creating month array which have months value in it.
for(i=1;i<=12;i++){ ##Starting a for loop for covering 12 months.
a[month[i]]=i ##Creating an array a whose index is month value and value is i.
} ##Closing for loop block here.
} ##Closing BEGIN section block here.
FNR==1{ ##Checking if this is first line.
print $0,"Time_diff" ##Printing current line with string Time_diff here.
next ##next will skip all further statements from here.
} ##Closing FNR conditoin block here.
{
split($(NF-1),array,"[:,-]") ##Splitting 2nd last column to array named array.
split($(NF),array1,"[:,-]") ##Splitting last column to array with delimietr as : or , or -
val=mktime(array[6]" "a[tolower(array[5])]" "array[4]" "array[1]" "array[2]" "array[3]) ##Creating val which have mktime value,passing array elements.
val1=mktime(array1[6]" "a[tolower(array1[5])]" "array1[4]" "array1[1]" "array1[2]" "array1[3]) ##Creating val1 variable by mktime passing array1 elements.
delta=val>=val1?val-val1:val1-val ##getting diff of val and val1 depending upon highest-lowest value
hrs = int(delta/3600) ##getting diff in hours if any.
min = int((delta - hrs*3600)/60) ##getting diff in min if any.
sec = delta - (hrs*3600 + min*60) ##getting diff in seconds value.
printf "%s\t%02d:%02d:%02d\n", $0, hrs, min, sec ##Printing line and value of hrs,min and sec values here.
hrs=min=sec=delta="" ##Nullifying variables values here.
}
' Input_file | column -t ##Mentioning Input_file and passing it to column command for TAB format in output.