As @josiah Swain said, it's not going to be pretty. For this sort of thing it's more recommended to use JS as it can understand what you have.
Saying that, python is awesome and here is you solution!
#Same imports as before
from bs4 import BeautifulSoup
import re
from urllib.request import Request, urlopen
#And one more
import json
# The code you had
req = Request('http://www.futhead.com/squad-building-challenges/squads/343',
headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
soup = BeautifulSoup(webpage,'html.parser')
# Store the script
script = soup.find_all('script')[17]
# Extract the oneline that stores all that JSON
uncleanJson = [line for line in script.text.split('\n')
if line.lstrip().startswith('squad.register_players($.parseJSON') ][0]
# The easiest way to strip away all that yucky JS to get to the JSON
cleanJSON = uncleanJson.lstrip() \
.replace('squad.register_players($.parseJSON(\'', '') \
.replace('\'));','')
# Extract out that useful info
data = [ [p['position'],p['data']['slot_position'],p['data']['slug']]
for p in json.loads(cleanJSON)
if p['player'] is not None]
print('position,slot_position,slug')
for line in data:
print(','.join(line))
The result I get for copying and pasting this into python is:
position,slot_position,slug
ST,ST,paulo-henrique
LM,LM,mugdat-celik
CAM,CAM,soner-aydogdu
RM,RM,petar-grbic
GK,GK,fatih-ozturk
CDM,CDM,eray-ataseven
LB,LB,kadir-keles
CB,CB,caner-osmanpasa
CB,CB,mustafa-yumlu
RM,RM,ioan-adrian-hora
GK,GK,bora-kork
Edit: On reflection this is not the easiest code to read for a beginner. Here is a easier to read version
# ... All that previous code
script = soup.find_all('script')[17]
allScriptLines = script.text.split('\n')
uncleanJson = None
for line in allScriptLines:
# Remove left whitespace (makes it easier to parse)
cleaner_line = line.lstrip()
if cleaner_line.startswith('squad.register_players($.parseJSON'):
uncleanJson = cleaner_line
cleanJSON = uncleanJson.replace('squad.register_players($.parseJSON(\'', '').replace('\'));','')
print('position,slot_position,slug')
for player in json.loads(cleanJSON):
if player['player'] is not None:
print(player['position'],player['data']['slot_position'],player['data']['slug'])