I am trying to extract data from an API from my government. The API is divided in pages, and has 10 observations per page. I wrote an algorithm that grabs the important information from each observation and adds it to a pandas dataframe. Everything was going great, until I reach iteration 29, at which I get the error mentioned in the title.
Here's the code I wrote:
#Database Creation Using API
#Import Libraries
import requests
import pandas as pd
#Define a list of relevant variables to automatize information acquisition
relevant_vars = ["year","ocid","date","region","title","description","suppliers","buyer","amount"\
,"budget"]#Define a list of relevant variables to automatize information acquisition
#Creation of empty Pandas Dataframe to save all the pertinent information from the database.
data_collected = pd.DataFrame(columns = relevant_vars)
#Access to API's data
#API number 1: "Búsqueda de procesos de contratación por medio de palabra"
#Need an initial response to start while loop
def firstResponse():
url_t = "https://datosabiertos.compraspublicas.gob.ec/PLATAFORMA/api/search_ocds"
payload = {"year":"2015","page":"2"}
r = requests.get(url_t,params = payload).json()
return r
#Individual information saver.
def infoSave(variables,item):
rp = firstResponse()
temp = []
for i in variables:
i = rp["data"][item][str(i)]
temp.append(i)
return temp
#Information gatherer
def infoGet(yr,url,obs=0):
rp = dict.copy(firstResponse())
observations = 0
page_count = 0
debug_count = 0
#If no observations parameter is set, automatically gather all available data for that year.
#Make all the API calls for the specific year (each page represents a call)
while rp["pages"] - rp["page"] > 1:
page_count = page_count + 1
print(page_count)
url_n = url
payload = {"page":str(page_count),"year":str(yr)}
rp = requests.get(url_n,params=payload).json()
#Now that the call has been made, save this information in many variables.
for item in range(len(rp["data"])):
debug_count = debug_count + 1
print(f"Iteration no.{debug_count}"+str(infoSave(relevant_vars,item)))
year, ocid, date, region, title, description, suppliers, buyer, amount, budget = infoSave(relevant_vars,item)
#After storing the information in the variables, append it to the pandas dataframe
final_dataframe = data_collected.append({"year":year,"ocid":ocid,"date":date,\
"region":region,"title":title,\
"description":description,\
"suppliers":suppliers,"buyer":buyer,\
"amount":amount,"budget":budget},ignore_index \
= True)
observations = observations + 1
if obs == 0:
pass
elif observations == obs:
break
Then I tried to run the infoGet method:
infoGet(2015,"https://datosabiertos.compraspublicas.gob.ec/PLATAFORMA/api/search_ocds",obs=10)
Which runs perfectly until iteration 29, when I receive this error message:
JSONDecodeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_18876/3797698545.py in <module>
1 #Extract the required information from API
----> 2 infoGet(2015,"https://datosabiertos.compraspublicas.gob.ec/PLATAFORMA/api/search_ocds",obs=10)
~\AppData\Local\Temp/ipykernel_18876/506738801.py in infoGet(yr, url, obs)
47 for item in range(len(rp["data"])):
48 debug_count = debug_count + 1
---> 49 print(f"Iteration no.{debug_count}"+str(infoSave(relevant_vars,item)))
50 year, ocid, date, region, title, description, suppliers, buyer, amount, budget = infoSave(relevant_vars,item)
51 #After storing the information in the variables, append it to the pandas dataframe
~\AppData\Local\Temp/ipykernel_18876/506738801.py in infoSave(variables, item)
23 #Individual information saver.
24 def infoSave(variables,item):
---> 25 rp = firstResponse()
26 temp = []
27 for i in variables:
~\AppData\Local\Temp/ipykernel_18876/506738801.py in firstResponse()
18 url_t = "https://datosabiertos.compraspublicas.gob.ec/PLATAFORMA/api/search_ocds"
19 payload = {"year":"2015","page":"2"}
---> 20 r = requests.get(url_t,params = payload).json()
21 return r
22
D:\ProgramData\lib\site-packages\requests\models.py in json(self, **kwargs)
908 # used.
909 pass
--> 910 return complexjson.loads(self.text, **kwargs)
911
912 @property
D:\ProgramData\lib\json\__init__.py in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
344 parse_int is None and parse_float is None and
345 parse_constant is None and object_pairs_hook is None and not kw):
--> 346 return _default_decoder.decode(s)
347 if cls is None:
348 cls = JSONDecoder
D:\ProgramData\lib\json\decoder.py in decode(self, s, _w)
335
336 """
--> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
339 if end != len(s):
D:\ProgramData\lib\json\decoder.py in raw_decode(self, s, idx)
353 obj, end = self.scan_once(s, idx)
354 except StopIteration as err:
--> 355 raise JSONDecodeError("Expecting value", s, err.value) from None
356 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
I would be really grateful if somebody could shed some light on why I'm getting this error code specifically when I arrive at this observation. I tried only getting that observation and it works just fine: It has the exact same amount of data as the other observations, and is exactly the same kind of object.
Thank you for your help!