I'm scraping weather data from timeanddate.com using the following code:
import requests, re, typing
from bs4 import BeautifulSoup as soup
import contextlib
import pandas as pd
def _remove(d:list) -> list:
return list(filter(None, [re.sub('\xa0', '', b) for b in d]))
@contextlib.contextmanager
def get_weather_data(url:str, by_url = True) -> typing.Generator[dict, None, None]:
d = soup(requests.get(url).text if by_url else url, 'html.parser')
_table = d.find('table', {'id':'wt-his'})
_data = [[[i.text for i in c.find_all('th')], *[i.text for i in c.find_all('td')]] for c in _table.find_all('tr')]
[h1], [h2], *data, _ = _data
_h2 = _remove(h2)
yield {tuple(_remove(h1)):[dict(zip(_h2, _remove([a, *i]))) for [[a], *i] in data]}
city = 'https://www.timeanddate.com/weather/germany/berlin/historic?hd=20210905'
with get_weather_data(city) as weather:
#print(weather)
df = pd.DataFrame.from_dict(weather)
df = df['Conditions']['Comfort']
# keep only temp, wind speed, humidity
df0 = pd.DataFrame()
df0[['Time','Temp','Wind']].assign(**pd.DataFrame(df0.to_dict()).T)
I want to convert the resulting table into a Pandas DataFrame where each column shows values for temperature, wind speed, etc. for each time. All the data is stored in dictionaries in a single cell for each timestamp, so I tried the suggestions in this post but nothing has worked so far. Is there a simple Pythonic way to convert these dictionary entries in each DataFrame row into columns?