What explains the difference in time to process column renaming in a Python dataframe? At first, I used the code df=df.rename(columns = {'colold':'colnew'}). This proved to be inefficient. I modified the code and used df.rename(columns = {'colold':'colnew'}) which reduced the processing time. While it seems obvious that the later approach would take less time as it reduces a step, why is this the case. In the following code, the difference between both approaches is about 30 seconds.
import time
import numpy as np
import random
import pandas as pd
def fun_user_id(start, end, step):
num = np.linspace(start, end,(end-start)
*int(1/step)+1).tolist()
return [round(i, 0) for i in num]
def fun_rand_num():
return list(map(lambda x: random.randint(300,800), range(1, 20000001)))
if __name__=='__main__':
userid=fun_user_id(1,20000001,.5)
var1=fun_rand_num()
var2=fun_rand_num()
var3=fun_rand_num()
var4=fun_rand_num()
var5=fun_rand_num()
var6=fun_rand_num()
var7=fun_rand_num()
var8=fun_rand_num()
var9=fun_rand_num()
var10=fun_rand_num()
var1a=fun_rand_num()
var2a=fun_rand_num()
var3a=fun_rand_num()
var4a=fun_rand_num()
var5a=fun_rand_num()
var6a=fun_rand_num()
var7a=fun_rand_num()
var8a=fun_rand_num()
var9a=fun_rand_num()
var10a=fun_rand_num()
df = pd.DataFrame(list(zip(userid,var1, var2,var3,var4,var5,var6,var7,var8,var9,var10,var1a,var2a,var3a,var4a,var5a,var6a,var7a,var8a,var9a,var10a)),
columns =['userid','var1', 'var2','var3','var4','var5',
'var6', 'var7','var8','var9','var10',
'var1a', 'var2a','var3a','var4a','var5a',
'var6a', 'var7a','var8a','var9a','var10a'])
start1 = time.time()
df=df.rename(columns = {'var1':'VAR1'})
df=df.rename(columns = {'var2':'VAR2'})
df=df.rename(columns = {'var3':'VAR3'})
df=df.rename(columns = {'var4':'VAR4'})
df=df.rename(columns = {'var5':'VAR5'})
df=df.rename(columns = {'var1a':'VAR1a'})
df=df.rename(columns = {'var2a':'VAR2a'})
df=df.rename(columns = {'var3a':'VAR3a'})
df=df.rename(columns = {'var4a':'VAR4a'})
df=df.rename(columns = {'var5a':'VAR5a'})
end1= time.time()
print(f"Elapsed time rename df reassigned : {end1 - start1:.2f} seconds")
start2 = time.time()
df.rename(columns = {'var6':'VAR6'})
df.rename(columns = {'var7':'VAR7'})
df.rename(columns = {'var8':'VAR8'})
df.rename(columns = {'var9':'VAR9'})
df.rename(columns = {'var10':'VAR10'})
df.rename(columns = {'var6a':'VAR6a'})
df.rename(columns = {'var7a':'VAR7a'})
df.rename(columns = {'var8a':'VAR8a'})
df.rename(columns = {'var9a':'VAR9a'})
df.rename(columns = {'var10a':'VAR10a'})
end2= time.time()
print(f"Elapsed time rename df not reassigned : {end2 - start2:.2f} seconds")
print(f"Time Difference : {(end2 - start2)-(end1 - start1):.2f} seconds")