Since user
is going to be the key, let's stuff a dictionary. Note: this will ultimately load the entire file into memory once, but it does not require the file to be sorted by user
first. Also note the output is not sorted either (because dict.items()
does not retrieve dictionary items in any deterministic order).
output = {}
with f as open('input.csv'):
for line in f:
user, meeting = line.strip('\r\n').split()
# we strip newlines before splitting on whitespace
if user not in output and user != 'user':
# the user was not found in the dict (and we want to skip the header)
output[user] = [meeting] # add the user, with the first meeting
else: # user already exists in dict
output[user].append(meeting) # add meeting to user entry
# print output header
print("user meetings") # I used a single space, feel free to use '\t' etc.
# lets retrieve all meetings per user
for user, meetings in output.items() # in python2, use .iteritems() instead
meetings = ','.join(_ for _ in meetings) # format ["1","2","3"] to "1,2,3"
print('{} "[{}]"'.format(user, meetings))
Fancier: sort output. I do this by sorting the keys first. Note that this will use even more memory since I am creating a list of the keys too.
# same as before
output = {}
with f as open('input.csv'):
for line in f:
user, meeting = line.strip('\r\n').split()
# we strip newlines before splitting on whitespace
if user not in output and user != 'user':
# the user was not found in the dict (and we want to skip the header)
output[user] = [meeting] # add the user, with the first meeting
else: # user already exists in dict
output[user].append(meeting) # add meeting to user entry
# print output header
print("user meetings") # I used a single space, feel free to use '\t' etc.
# sort my dict keys before printing them:
for user in sorted(output.keys()):
meetings = ','.join(_ for _ in output[user])
print('{} "[{}]"'.format(user, meetings))