My code below gets the street address for each gym, but there is an error in the spacing of the output for the hours that the gym is open. Any ideas of where I went wrong?
import urlparse
from bs4 import BeautifulSoup
from bs4 import Tag
import requests
import time
import csv
sitemap = 'https://www.planetfitness.com/sitemap'
sitemap_content = requests.get(sitemap).content
soup = BeautifulSoup(sitemap_content, 'html.parser')
atags = soup.select('td[class~=club-title] > a[href^="/gyms"]')
links = [atag.get('href') for atag in atags]
with open('gyms.csv', 'w') as gf:
gymwriter = csv.writer(gf)
for link in links:
gymurl = urlparse.urljoin(sitemap, link)
sitemap_content = requests.get(gymurl).content
soup = BeautifulSoup(sitemap_content, 'html.parser')
gymrow = [ gymurl ]
address_line1 = soup.select('p[class~=address] > span[class~=address-line1]')
gymrow.append(address_line1[0].text)
locality = soup.select('p[class~=address] > span[class~=locality]')
gymrow.append(locality[0].text)
administrative_area = soup.select('p[class~=address] > span[class~=administrative-area]')
gymrow.append(administrative_area[0].text)
postal_code = soup.select('p[class~=address] > span[class~=postal-code]')
gymrow.append(postal_code[0].text)
country = soup.select('p[class~=address] > span[class~=country]')
gymrow.append(country[0].text)
strongs = soup.select('div > strong')
for strong in strongs:
if strong.text == 'Club Hours':
for sibling in strong.next_siblings:
if isinstance(sibling, Tag):
hours = sibling.text
gymrow.append(hours)
break
print(gymrow)
gymwriter.writerow(gymrow)
time.sleep(3)
Thank you for your help!