1

I pulling JSON formatted data from api and the source JSON format looks like this:

Source Sample:

{
"hasMore": false,
"data": [
    {
        "id": "ne0OyNrH0u1X7zHSRcxItg",
        "name": "Start Here",
        "createdDate": "2021-01-06T18:59:54.923000Z",
        "createdById": "kIU0GuRk43lLi8pq16VIgQ",
        "updatedDate": "2021-01-25T19:41:21.231510Z",
        "updatedById": "utcXmeXjBb7vyAzMClxD5g",
        "status": "Completed",
        "templateId": "npt5sit7R6AqrrXNlh9FMQ",
        "completedDate": "2021-01-25T19:41:21.374000Z",
        "completedById": "utcXmeXjBb7vyAzMClxD5g",
        "dueDate": null
    },
    {
        "id": "soYwhmamfpFPv1kWesZKJg",
        "name": "Test Well No. 3D",
        "createdDate": "2021-02-09T20:27:04.307000Z",
        "createdById": "i6Doi4vke7SefvGbyGpN3A",
        "updatedDate": "2021-02-09T20:27:04.307000Z",
        "updatedById": "i6Doi4vke7SefvGbyGpN3A",
        "status": "Active",
        "templateId": "nDEwm4HENbpWfmpotstPKw",
        "completedDate": null,
        "completedById": null,
        "dueDate": null
    }
],
"nextPageId": "soYwhmamfpFPv1kWesZKJg",
"nextPageUpdatedDate": "2021-02-09T20:27:04.307000Z"
}

Target Sample:

id,name,createdDate,createdById,updatedDate,updatedById,status,templateId,completedDate,completedById,dueDate
"ne0OyNrH0u1X7zHSRcxItg","Start Here","2021-01-06T18:59:54.923000Z","kIU0GuRk43lLi8pq16VIgQ","2021-01-25T19:41:21.231510Z","utcXmeXjBb7vyAzMClxD5g","Completed","npt5sit7R6AqrrXNlh9FMQ","2021-01-25T19:41:21.374000Z","utcXmeXjBb7vyAzMClxD5g",
"udvF3_5fRO1wsPA-4XBDuw","Test Well No. 1D","2021-01-25T14:36:34.270000Z","i6Doi4vke7SefvGbyGpN3A","2021-02-03T15:00:48.923031Z","i6Doi4vke7SefvGbyGpN3A","Active","nDEwm4HENbpWfmpotstPKw",,,
json_data = response.json()['data']
for each_rec in json_data:
        if count == 0: 
                    # Writing headers of CSV file 
            header = each_rec.keys() 
            csv_writer.writerow(header)
            count += 1
        else:
            csv_writer.writerow(each_rec.values())
response.close

how do I include the "hasMore","nextPageId","nextPageUpdatedDate" to each row being inserted to a file?

Thanks

Barbaros Özhan
  • 59,113
  • 10
  • 31
  • 55
kal
  • 11
  • 3
  • Store response like this `resp = response.json()`. Then, `has_more = resp['hasMore']`. Your data will be `json_data = resp['data']`. Similarly, you store next page id and next page updated date and write to the file. Headers will be come `header = ... + ', hasKeys, nextPageID...`. When writing data, append has_more, next_page_id, next_page_update_date and you should be good to go. – zedfoxus Feb 24 '21 at 19:55
  • its not letting me concat the values since these are dict values.the below code I tried and its changing the structure . #csv_writer.writerow(str(hasMore) + ',' + str(each_rec.values()) + nextPageId + ',' + nextPageUpdatedDate) – kal Feb 24 '21 at 20:05

1 Answers1

0

You can try something like this:

resp = response.json()

has_more = resp['hasMore']
json_data = resp['data']
next_page_id = resp['nextPageId']
next_page_update_date = resp['nextPageUpdatedDate']

csv_file = open('./test100.csv', 'w')
csv_writer = csv.writer(csv_file, delimiter=',')

count = 0

for each_rec in json_data:
        if count == 0:

            # get the headers but convert to a list
            header = list(each_rec.keys())
            # add 3 more fields to the list
            header += ['hasMore', 'nextPageId', 'nextPageUpdatedDate']
            csv_writer.writerow(header)
            count += 1
        else:
            # convert values to a list and add 3 more values
            values = list(each_rec.values())
            values += [has_more, next_page_id, next_page_update_date]
            csv_writer.writerow(values)

For those who want to simulate the JSON, use this code with simulated data:

import csv
import json
import sys

text ='''{
"hasMore": false,
"data": [
    {
        "id": "ne0OyNrH0u1X7zHSRcxItg",
        "name": "Start Here",
        "createdDate": "2021-01-06T18:59:54.923000Z",
        "createdById": "kIU0GuRk43lLi8pq16VIgQ",
        "updatedDate": "2021-01-25T19:41:21.231510Z",
        "updatedById": "utcXmeXjBb7vyAzMClxD5g",
        "status": "Completed",
        "templateId": "npt5sit7R6AqrrXNlh9FMQ",
        "completedDate": "2021-01-25T19:41:21.374000Z",
        "completedById": "utcXmeXjBb7vyAzMClxD5g",
        "dueDate": null
    },
    {
        "id": "soYwhmamfpFPv1kWesZKJg",
        "name": "Test Well No. 3D",
        "createdDate": "2021-02-09T20:27:04.307000Z",
        "createdById": "i6Doi4vke7SefvGbyGpN3A",
        "updatedDate": "2021-02-09T20:27:04.307000Z",
        "updatedById": "i6Doi4vke7SefvGbyGpN3A",
        "status": "Active",
        "templateId": "nDEwm4HENbpWfmpotstPKw",
        "completedDate": null,
        "completedById": null,
        "dueDate": null
    }
],
"nextPageId": "soYwhmamfpFPv1kWesZKJg",
"nextPageUpdatedDate": "2021-02-09T20:27:04.307000Z"
}'''

resp = json.loads(text)

has_more = resp['hasMore']
json_data = resp['data']
next_page_id = resp['nextPageId']
next_page_update_date = resp['nextPageUpdatedDate']

csv_file = open('./test100.csv', 'w')
csv_writer = csv.writer(csv_file, delimiter=',')
count = 0

for each_rec in json_data:
        if count == 0:
            header = list(each_rec.keys())
            header += ['hasMore', 'nextPageId', 'nextPageUpdatedDate']
            csv_writer.writerow(header)
            count += 1
        else:
            values = list(each_rec.values())
            values += [has_more, next_page_id, next_page_update_date]
            csv_writer.writerow(values)
zedfoxus
  • 35,121
  • 5
  • 64
  • 63