I have a pandas dataframe that I have extracted from a JSON file for breweries I'm interested in. most of these columns are as nested list of dictionaries. However two columns 'hours' and 'memberships' are being problematic.
I'd like to extract the 'hours' column into 7 columns "Mon_Hours","Tue_hours"...'Sun_Hours'. I have tried and tried to figure this out but these two columns are proving challenging.
Here is a link to the initial data: https://www.coloradobrewerylist.com/wp-json/cbl_api/v1/locations/?location-type%5Bnin%5D=404,405&page_size=1000&page_token=1
and here is my code:
import requests
import re
import pandas as pd
import numpy as np
import csv
import json
from datetime import datetime
### get the data from the Colorado Brewery list
url = "https://www.coloradobrewerylist.com/wp-json/cbl_api/v1/locations/?location-type%5Bnin%5D=404,405&page_size=1000&page_token=1"
payload={}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
data=response.json()
### convert results to table
pd.set_option('display.max_columns', None)
brewdf = pd.DataFrame.from_dict(data['results'])
#brewdf
############################################
#### CLEAN UP NESTED LIST-DICT COLUMNS #####
############################################
## cleanup dogs column
dogs = pd.json_normalize(brewdf['dogs'])
dogs2 = dogs.squeeze()
dogsdf = pd.json_normalize(dogs2)
dogsdf = dogsdf.drop(columns =['id','slug'])
dogsdf = dogsdf.rename(columns={'name':'dogs_allowed'})
#dogsdf
## cleanup parking column
parking = pd.json_normalize(brewdf['parking'])
parking = parking.rename(columns = {0:'Parking1',1:'Parking2',2:'Parking3'})
a = pd.json_normalize(parking['Parking1'])
b = pd.json_normalize(parking['Parking2'])
c = pd.json_normalize(parking['Parking3'])
parkcombo = pd.concat([a,b,c],ignore_index=True, axis=1)
parkcombo = parkcombo.rename(columns = {2:'P1',5:'P2',8:'P3'})
parkcombo['parking_type'] = parkcombo['P1'].map(str) + ',' + parkcombo['P2'].map(str) + ',' + parkcombo['P3'].map(str)
parkcombo['parking_type'] = parkcombo['parking_type'].str.replace(",nan",'')
parkdf = parkcombo['parking_type'].to_frame()
#parkdf
## cleanup food type column
food = pd.json_normalize(brewdf['food_type'])
food
food = food.rename(columns = {0:'Food1',1:'Food2',2:'Food3',3:'Food4',4:'Food5',5:'Food6'})
a = pd.json_normalize(food['Food1'])
b = pd.json_normalize(food['Food2'])
c = pd.json_normalize(food['Food3'])
d = pd.json_normalize(food['Food4'])
e = pd.json_normalize(food['Food5'])
f = pd.json_normalize(food['Food6'])
foodcombo = pd.concat([a,b,c,d,e,f],ignore_index=True, axis =1)
foodcombo
foodcombo = foodcombo.rename(columns = {2:'F1',5:'F2',8:'F3',11:'F4',14:'F5',17:'F6'})
foodcombo['food_type'] = foodcombo['F1'].map(str) + ',' + foodcombo['F2'].map(str) + ',' + foodcombo['F3'].map(str) + ',' + foodcombo['F4'].map(str)+ ',' + foodcombo['F5'].map(str) + ',' + foodcombo['F6'].map(str)
foodcombo['food_type'] = foodcombo['food_type'].str.replace(",nan",'')
fooddf = foodcombo['food_type'].to_frame()
#fooddf
## cleanup patio column
patio = pd.json_normalize(brewdf['patio'])
patio = patio.rename(columns = {0:'P1',1:'P2',2:'P3'})
a = pd.json_normalize(patio['P1'])
b = pd.json_normalize(patio['P2'])
c = pd.json_normalize(patio['P3'])
patiocombo = pd.concat([a,b,c],ignore_index=True, axis =1)
patiocombo
patiocombo = patiocombo.rename(columns = {2:'P1',5:'P2',8:'P3'})
patiocombo['patio_type'] = patiocombo['P1'].map(str) + ',' + patiocombo['P2'].map(str) + ',' + patiocombo['P3'].map(str)
patiocombo['patio_type'] = patiocombo['patio_type'].str.replace(",nan",'')
patiodf = patiocombo['patio_type'].to_frame()
#patiodf
## clean visitor type column
visitor = pd.json_normalize(brewdf['visitors'])
visitor
visitor = visitor.rename(columns = {0:'V1',1:'V2',2:'V3'})
a = pd.json_normalize(visitor['V1'])
b = pd.json_normalize(visitor['V2'])
c = pd.json_normalize(visitor['V3'])
visitorcombo = pd.concat([a,b,c],ignore_index=True, axis =1)
visitorcombo
visitorcombo = visitorcombo.rename(columns = {2:'V1',5:'V2',8:'V3'})
visitorcombo['visitor_type'] = visitorcombo['V1'].map(str) + ',' + visitorcombo['V2'].map(str) + ',' + visitorcombo['V3'].map(str)
visitorcombo['visitor_type'] = visitorcombo['visitor_type'].str.replace(",nan",'')
visitordf = visitorcombo['visitor_type'].to_frame()
#visitordf
## clean tour type column
tour = pd.json_normalize(brewdf['tour_type'])
tour
tour = tour.rename(columns = {0:'T1',1:'T2',2:'T3',3:'T4'})
a = pd.json_normalize(tour['T1'])
b = pd.json_normalize(tour['T2'])
c = pd.json_normalize(tour['T3'])
d = pd.json_normalize(tour['T4'])
tourcombo = pd.concat([a,b,c,d],ignore_index=True, axis =1)
tourcombo
tourcombo = tourcombo.rename(columns = {2:'T1',5:'T2',8:'T3',11:'T4'})
tourcombo['tour_type'] = tourcombo['T1'].map(str) + ',' + tourcombo['T2'].map(str) + ',' + tourcombo['T3'].map(str) + ','+ tourcombo['T4'].map(str)
tourcombo['tour_type'] = tourcombo['tour_type'].str.replace(",nan",'')
tourdf = tourcombo['tour_type'].to_frame()
#tourdf
## clean other drinks column
odrink = pd.json_normalize(brewdf['otherdrinks_type'])
odrink
odrink = odrink.rename(columns = {0:'O1',1:'O2',2:'O3',3:'O4',4:'O5',5:'O6',6:'O7',7:'O8',8:'O9'})
a = pd.json_normalize(odrink['O1'])
b = pd.json_normalize(odrink['O2'])
c = pd.json_normalize(odrink['O3'])
d = pd.json_normalize(odrink['O4'])
e = pd.json_normalize(odrink['O5'])
f = pd.json_normalize(odrink['O6'])
g = pd.json_normalize(odrink['O7'])
h = pd.json_normalize(odrink['O8'])
i = pd.json_normalize(odrink['O9'])
odrinkcombo = pd.concat([a,b,c,d,e,f,g,h,i],ignore_index=True, axis =1)
odrinkcombo
odrinkcombo = odrinkcombo.rename(columns = {2:'O1',5:'O2',8:'O3',11:'O4',14:'O5',17:'O6',20:'O7',23:'O8',26:'O9'})
odrinkcombo['odrink_type'] = odrinkcombo['O1'].map(str) + ',' + odrinkcombo['O2'].map(str) + ',' + odrinkcombo['O3'].map(str) + ','+ odrinkcombo['O4'].map(str) + ','+ odrinkcombo['O5'].map(str)+ ','+ odrinkcombo['O6'].map(str)+ ','+ odrinkcombo['O7'].map(str)+','+ odrinkcombo['O8'].map(str)+','+ odrinkcombo['O9'].map(str)
odrinkcombo['odrink_type'] = odrinkcombo['odrink_type'].str.replace(",nan",'')
odrinkdf = odrinkcombo['odrink_type'].to_frame()
#odrinkdf
## clean to-go column
togo = pd.json_normalize(brewdf['togo_type'])
togo
togo = togo.rename(columns = {0:'TG1',1:'TG2',2:'TG3',3:'TG4',4:'TG5'})
a = pd.json_normalize(togo['TG1'])
b = pd.json_normalize(togo['TG2'])
c = pd.json_normalize(togo['TG3'])
d = pd.json_normalize(togo['TG4'])
e = pd.json_normalize(togo['TG5'])
togocombo = pd.concat([a,b,c,d,e],ignore_index=True, axis =1)
togocombo
togocombo = togocombo.rename(columns = {2:'TG1',5:'TG2',8:'TG3',11:'TG4',14:'TG5'})
togocombo['togo_type'] = togocombo['TG1'].map(str) + ',' + togocombo['TG2'].map(str) + ',' + togocombo['TG3'].map(str) + ','+ togocombo['TG4'].map(str) + ','+ togocombo['TG5'].map(str)
togocombo['togo_type'] = togocombo['togo_type'].str.replace(",nan",'')
togodf = togocombo['togo_type'].to_frame()
#togodf
## clean merch column
merch = pd.json_normalize(brewdf['merch_type'])
merch
merch = merch.rename(columns = {0:'M1',1:'M2',2:'M3',3:'M4',4:'M5',5:'M6',6:'M7',7:'M8',8:'M9',9:'M10',10:'M11',11:'M12'})
a = pd.json_normalize(merch['M1'])
b = pd.json_normalize(merch['M2'])
c = pd.json_normalize(merch['M3'])
d = pd.json_normalize(merch['M4'])
e = pd.json_normalize(merch['M5'])
f = pd.json_normalize(merch['M6'])
g = pd.json_normalize(merch['M7'])
h = pd.json_normalize(merch['M8'])
i = pd.json_normalize(merch['M9'])
j = pd.json_normalize(merch['M10'])
k = pd.json_normalize(merch['M11'])
l = pd.json_normalize(merch['M12'])
merchcombo = pd.concat([a,b,c,d,e,f,g,h,i,j,k,l],ignore_index=True, axis =1)
merchcombo
merchcombo = merchcombo.rename(columns = {2:'M1',5:'M2',8:'M3',11:'M4',14:'M5',17:'M6',20:'M7',23:'M8',26:'M9',29:'M10',32:'M11',35:'M12'})
merchcombo['merch_type'] = (merchcombo['M1'].map(str) + ',' + merchcombo['M2'].map(str) + ',' + merchcombo['M3'].map(str) + ','+ merchcombo['M4'].map(str) + ','
+ merchcombo['M5'].map(str) + ',' + merchcombo['M6'].map(str)+ ',' + merchcombo['M7'].map(str) + ',' + merchcombo['M8'].map(str)
+ ',' + merchcombo['M9'].map(str)+ ',' + merchcombo['M10'].map(str)+ ',' + merchcombo['M11'].map(str)+ ',' + merchcombo['M12'].map(str))
merchcombo['merch_type'] = merchcombo['merch_type'].str.replace(",nan",'')
merchdf = merchcombo['merch_type'].to_frame()
#merchdf
### clean description column
brewdf['description'] = brewdf['description'].str.replace(r'<[^<>]*>', '', regex=True)
#brewdf
### replace nan with null
brewdf = brewdf.replace('nan',np.nan)
brewdf = brewdf.replace('None',np.nan)
brewdf
cleanedbrewdf = brewdf.drop(columns = {'food_type','tour_type','otherdrinks_type','articles','merch_type','togo_type','patio','visitors','parking','dogs'})
mergedbrewdf = pd.concat([cleanedbrewdf,dogsdf,parkdf,fooddf,patiodf,
visitordf,tourdf,odrinkdf,togodf,merchdf,],ignore_index=False,axis=1)
mergedbrewdf
### remove non-existing
finalbrewdf = mergedbrewdf.loc[(mergedbrewdf['lon'].notnull())].copy()
finalbrewdf['lon'] = finalbrewdf['lon'].astype(float)
finalbrewdf['lat'] = finalbrewdf['lat'].astype(float)
finalbrewdf
Can someone please point me in the right direction for the hours and memberships columns? Also, is there a more efficient way to look through these different columns? They have different nested list-dict lengths which I thought might prevent me from writing a function.