sorting within csv file

Question

I am currently trying to create a csv file which takes in countries, where their people migrate to, the coordinates of both the original countries and the destinations and the population that actually migrated. I am able to output this information into a file just fine, but I am supposed to sort the output csv from highest to lowest population, which just isn't working. I know that I have to append all the dictionaries into a list, and then sort the population counts through a list, but I just can't seem to figure out how to do the sort itself, and then where to put the code so that it still outputs the right information. In other words, if I put the sort and then the writing step too far out in my for loop I only get the information for zimbabwe, and if I put it farther in the code never finishes... any ideas would be very much appreciated, here is all of my relevant code:

def edges_csv(filename, orgin_column, dest_column, count_column):
    edges_dict = defaultdict(list)
    with open(filename, 'r', newline = '') as input_file:
        # prepare to read the rows of the file using the csv packages' DictReader routines
        edges_reader = csv.DictReader(input_file, delimiter=',', quotechar ='"')

        for row in edges_reader:
            origin = row['Country Origin Name']
            dest = row['Country Dest Name']
            count = row['2000 [2000]']

            tuple = dest, count

            edges_dict[origin].append(tuple)

        for country in edges_dict:
            edge_sort = sorted(edges_dict[country], key=lambda x: x[1], reverse=True)

            edges_dict[country] = edge_sort


    return(edges_dict)

def nodes_csv(filename, country_column, lat_column, long_column):

    with open('locations.csv', 'r', newline = '') as input_file:
        # prepare to read the rows of the file using the csv packages' DictReader routines
        locations_reader = csv.DictReader(input_file, delimiter=',', quotechar ='"')
        graph = defaultdict(list)
        for row in locations_reader:
            country = row['Country Name']
            lat = row['Latitude']
            long = row['Longitude']

            tuple = lat, long

            graph[country].append(tuple)

    return (graph)

def main():

    # open the tab-delimited input data file
    with open('world_bank_country_data.txt', 'r', newline = '') as input_file:
        # prepare to read the rows of the file using the csv packages' DictReader routines
        country_data_reader = csv.DictReader(input_file, delimiter='\t', quotechar ='"')

        with open('edges.csv', 'w', newline='') as output_file_3:

            big_list = []
            row_count = 0

            # origin_dest_count = edges_csv('world_bank_migration.csv',
            #                               'origin', 'dest', 'count')

            lats_longs = nodes_csv('locations.csv',
                               'country', 'latitude', 'longitude')

            migration_outflow_graph = read_directed_graph_from_csv("world_bank_migration.csv",
                                                               "Country Origin Name", "Country Dest Name",
                                                               "2000 [2000]")
            edges_writer = csv.DictWriter(output_file_3,
                                             fieldnames=['start_country', 'end_country', 'start_lat', 'start_long', 'end_lat', 'end_long', 'count'],
                                             extrasaction='ignore',
                                             delimiter=',', quotechar='"')



            for country in migration_outflow_graph:
                edges_dict['start_country'] = country
                tuple = migration_outflow_graph[country]
                loc = lats_longs[country]
                for item in tuple:
                    edges_dict['end_country'] = item[0]
                    edges_dict['count'] = item[1]
                    count_sort = edges_dict['count']
                    end_country = edges_dict['end_country']
                    dest = lats_longs[end_country]

                    for thing in loc:
                        edges_dict['start_lat'] = thing[0]
                        edges_dict['start_long'] = thing[1]

                    for thing in dest:
                        edges_dict['end_lat'] = thing[0]
                        edges_dict['end_long'] = thing[1]
            # big_list = sorted(big_list, key = lambda x: x['count'], reverse = True)
            # print(big_list)
            # for elem in big_list:
            #     row_count = row_count + 1
            #     if row_count <= 1000:
            #         edges_writer.writerow(elem)

                        big_list.append(edges_dict)

not that you cannot do it without it but you should definitely look into `pandas` (http://pandas.pydata.org/) — Ma0, Sep 23 '16 at 08:15
For Pandas check this - http://stackoverflow.com/questions/15559812/sorting-by-specific-column-data-using-csv-in-python — Dinesh Pundkar, Sep 23 '16 at 08:20
More this also - http://stackoverflow.com/questions/2089036/sorting-csv-in-python — Dinesh Pundkar, Sep 23 '16 at 08:20
There is module as csvsort. https://pypi.python.org/pypi/csvsort/1.3 — Dinesh Pundkar, Sep 23 '16 at 08:21

sorting within csv file

0 Answers0