I have a this JSON that contains information from many "videos". Within every single "video" in the JSON is another link to a new JSON that contains "messages".
I am trying to iterate through the "message" JSON links and insert them to a MongoDB database.
The problem is that I get a JSONDecodeError. What am I doing wrong and how do I make it right?
Traceback (most recent call last):
File "/import_messages_dev.py", line 35, in raw_messages_data = requests.get(url3).json()
File "venv1/lib/python3.6/site-packages/requests/models.py", line 892, in json return complexjson.loads(self.text, **kwargs)
File "/usr/lib/python3.6/json/init.py", line 354, in loads return _default_decoder.decode(s)
File "/usr/lib/python3.6/json/decoder.py", line 342, in decode raise JSONDecodeError("Extra data", s, end) json.decoder.JSONDecodeError: Extra data: line 2 column 1 (char 380)
import urllib.parse
import requests
import pymongo
### DATABASE ####
# Connect to database // login user:password
uri = 'mongodb://testuser:password@ds245687.mlab.com:45687/liveme'
# Set client.
client = pymongo.MongoClient(uri)
# Set database.
db = client.get_database()
# Create collection.
messages = db['messages']
# The url to the live.me replays.
replay_url = "http://live.ksmobile.net/live/getreplayvideos?"
userid = 895324164037541888
# Parsing the urls for replays and profile with the userid.
url2 = replay_url + urllib.parse.urlencode({'userid': userid}) + '&page_size=1000'
# Printing urls for own validation.
print(f"Replay url: {url2}\n")
# Pull the data from replay json.
raw_replay_data = requests.get(url2).json()
print("Message links: ")
# Insert messages to database.
for i in raw_replay_data['data']['video_info']:
url3 = i['msgfile']
raw_messages_data = requests.get(url3).json()
messages.insert_many(raw_messages_data)
print(url3)
client.close()
Update for further help to the answer
So to iterate and get all the links, read it line by line and parse it as JSON and insert it to database I'm trying to do it like this, but it create a new error.
for i in raw_replay_data['data']['video_info']:
url3 = i['msgfile']
raw_message_data = urllib.request.urlopen(url3)
for line in raw_message_data:
json_data = json.loads(line)
messages.insert_many(json_data)
The new error is:
Traceback (most recent call last):
File "/import_messages_dev.py", line 54, in <module>
raw_message_data = urllib.request.urlopen(url3)
File "/usr/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/usr/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/usr/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/usr/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden