This isn't a good application for regex. Instead, you should deserialize it using the json
library, and find any dict keys named "author" in the resulting object. This is easy to do using a recursive function:
def find_authors(obj):
authors = [] # Empty list
if isinstance(obj, dict): # If obj is a dict, iterate over its keys
for key in obj:
if key == "author": # If the key is author, then append it to our return list
authors.append(obj[key])
elif isinstance(obj[key], (list, dict)):
# Else, if the value is a list or a dict, then look for authors inside it
# and extend the original list with the result
authors.extend(find_authors(obj[key]))
elif isinstance(obj, list): # Else if it's a list, iterate over its elements
for elem in obj:
# Look for authors in each element of the list, and extend the main authors list
authors.extend(find_authors(elem))
return authors
import urllib.request
import json
r = urllib.request.urlopen("https://api.github.com/users/rotki/events/public")
txt = r.read()
jobj = json.loads(txt)
find_authors(jobj)
Which gives a list containing all "author"
entries in the json. Note that this is an actual python list containing dictionaries, not a json string.
[{'email': 'lefteris@refu.co', 'name': 'Lefteris Karapetsas'},
{'email': 'lefteris@refu.co', 'name': 'Lefteris Karapetsas'},
{'email': 'lefteris@refu.co', 'name': 'Lefteris Karapetsas'},
{'email': 'lefteris@refu.co', 'name': 'Lefteris Karapetsas'},
{'email': 'lefteris@refu.co', 'name': 'Lefteris Karapetsas'},
{'email': 'lefteris@refu.co', 'name': 'Lefteris Karapetsas'},
{'email': 'lefteris@refu.co', 'name': 'Lefteris Karapetsas'},
{'email': 'lefteris@refu.co', 'name': 'Lefteris Karapetsas'},
{'email': 'lefteris@refu.co', 'name': 'Lefteris Karapetsas'}]