I was scraping a page in the Danish Language. I am having trouble with the output. The output contains many special characters like (Ã¥, Ã, Ã¥, æ)
and it's not like the one on the page.
How can I scrape the text just like on the page?
Example link: https://novaindex.com/dk/leverandoerer/mode-og-tekstiler/arbejdstoej
import scrapy
class MainSpider(scrapy.Spider):
name = 'main'
start_urls = ['https://novaindex.com/dk/leverandoerer/mode-og-tekstiler/arbejdstoej']
def parse(self, response):
details = response.xpath('//a[@class="companyresult "]')
for each in details:
name = each.xpath('normalize-space(.//span[@class="name"]/text())').get()
street = each.xpath('normalize-space(.//span[@class="street"]/text())').get()
city = each.xpath('normalize-space(.//span[@class="city"]/text())').get()
phone = each.xpath('normalize-space(.//span[@class="phone"]/text())').get()
yield {
"Name": name,
"Street Address": street,
"City Address": city,
"Phone": phone,
}