Stupid question. I have made my first scraper/crawler. It gives me exactly what i want, but when i write it to csv file, text appears with \n'] brackets. If i try to remove it in any way - it breaks my output in csv file. Although the website is in hebrew, it shouldn't be a problem. Just look at csv that you get. Thanks in advance
import csv
import requests
from bs4 import BeautifulSoup as bs
import io
url='https://www.maariv.co.il/news/politics'
source = requests.get(url).text
soup = bs(source, 'html.parser')
file = io.open('maariv7.csv', 'w', encoding="utf-16")
csv_writer = csv.writer(file, delimiter='|')
csv_writer.writerow(['Headline', 'Summary', 'Text', 'name'])
file.close()
def single_page_scraper(url):
source = requests.get(url).text
soup = bs(source, 'html.parser')
file = io.open('maariv7.csv', 'a', encoding="utf-16")
csv_writer = csv.writer(file, delimiter='|')
for article in soup.find_all(class_='article-title'):
headline = article.h1.text
print (headline,'\n')
for article in soup.find_all(class_='article-description'):
summary = article.h2.text
print(summary,'\n')
text=[]
name=[]
for par in soup.find_all(class_='article-body'):
text.append(par.get_text())
print(text)
politics = io.open('politicians.txt', 'r', encoding="utf-8")
my_list=politics.read().splitlines()
my_file=str(text)
for i in my_list:
if i in my_file:
name.append(i)
name_list = ", ".join(name)
print(name_list,'\n''\n''\n''\n')
csv_writer.writerow([headline, summary, my_file, name_list])
file.close()
for articles in soup.find_all(class_='three-articles-in-row'):
link = articles.a['href']
single_page_scraper(link)