I need parse file as where this link is given below. http://bit.ly/1x6yzoX
I wrote this fallowing method to parse this file, but unable to read incomplete data of latest year(2014) which empty spaces in table of text file. For now I am skipping the lines which I am unable to read.
Help me getting forward to how to handle this problem?.
LINES_TO_IGNORE = 7
import collections
import csv
def parse_file(data_file):
result_dict = collections.OrderedDict()
if not data_file:
return result_dict
with open(data_file) as f:
reader = csv.reader(f, delimiter="\t")
data = islice(reader, LINES_TO_IGNORE, None, None)
if not data:
return result_dict
# Get file headers
headers = data.next()
headers = headers[0].split()
keys = headers[1:]
for row in data:
values = row[0].split()
if len(headers) == len(values):
year = parse_to_int(values[0])
data_list = [parse_to_float(x) for x in values[1:]]
# Each line becomes a dict (column_header->value)
data_dict = collections.OrderedDict(zip(keys, data_list))
else:
print "Skipping"
# result_dict is dict of dict (year->data_dict)
result_dict[year] = data_dict
return result_dict