I have a data file that's built the following way:
source_id, target_id, impressions, clicks
on which I add the following columns:
- pair - a tuple of the source and target
- CTR - basically clicks/impressions
- Lower Bound
- Upper Bound
Lower/Upper bound are calculated values (it's irrelevant to my question, but for the curious ones these are the bounds for the Wilson confidence interval.
The thing is, I'm trying to sort the list by the lower bound (position = 6), descending. Tried several things (sort/sorted, using lambda vs. using itemgetter, creating a new list w/o the header and try to sort just that) and still it appears nothing changes. I have the code below.
import csv
from math import sqrt
from operator import itemgetter
#----- Read CSV ----------------------------------------------------------------
raw_data_csv = open('rawdile', "rb")
raw_reader = csv.reader(raw_data_csv)
# transform the values to ints.
raw_data = []
for rownum,row in enumerate(list(raw_reader)):
if rownum == 0: # Header
raw_data.append(row)
else:
r = [] # Col header
r.extend([int(x) for x in row]) # Transforming the values to ints
raw_data.append(r)
# Add cols for pairs (as tuple) and CTR
raw_data[0].append("pair")
for row in raw_data[1:]:
row.append((row[0],row[1])) # tuple
# row.append(float(row[3])/row[2]) # CTR
# ------------------------------------------------------------------------------
z = 1.95996398454005
def confidence(n, clicks):
if n == 0:
return 0
phat = float(clicks) / n
l_bound = ((phat + z*z/(2*n) - z * sqrt((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n)) # lower bound
u_bound = ((phat + z*z/(2*n) + z * sqrt((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n)) # upper bound
return phat, l_bound, u_bound
raw_data[0].extend(["CTR","Lower Bound","Upper Bound"])
for row in raw_data[1:]:
phat, l_bound, u_bound = confidence(row[2],row[3])
row.extend([phat, l_bound, u_bound])
# raw_data[1:].sort(key=lambda x: x[6], reverse=True)
sorted(raw_data[1:], key=itemgetter(6), reverse=True)
outputfile= open('outputfile.csv', 'wb')
wr = csv.writer(outputfile,quoting = csv.QUOTE_ALL)
wr.writerows(raw_data)
raw_data_csv.close()
outputfile.close()
Can anybody tell why? Thanks!