Actually, Data is generating from external url which is API calls HTML response as POST
method.
import scrapy
from scrapy.crawler import CrawlerProcess
class TestSpider(scrapy.Spider):
name = 'test'
def start_requests(self):
url = 'https://howlongtobeat.com/search_results?page=1'
payload = "queryString=&t=games&sorthead=popular&sortd=0&plat=&length_type=main&length_min=&length_max=&v=&f=&g=&detail=&randomize=0"
headers = {
"content-type":"application/x-www-form-urlencoded",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36"
}
yield scrapy.Request(url,method='POST', body=payload,headers=headers,callback=self.parse)
def parse(self, response):
cards = response.css('div[class="search_list_details"]')
for card in cards:
game_name = card.css('a[class=text_white]::attr(title)').get()
yield {
"game_name":game_name
}
if __name__ == "__main__":
process =CrawlerProcess()
process.crawl(TestSpider)
process.start()
Output:
{'game_name': 'Elden Ring'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Cyberpunk 2077'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Kirby and the Forgotten Land'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'LEGO Star Wars The Skywalker Saga'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Hollow Knight'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Tomb Raider'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Portal 2'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Hades'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'The Witcher 3 Wild Hunt'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Red Dead Redemption 2'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'BioShock'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Portal'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Horizon Forbidden West'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Trek to Yomi'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Grand Theft Auto V'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'God of War'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Marvels Guardians of the Galaxy'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'BioShock Infinite'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Pokmon Legends Arceus'}
2022-05-12 13:37:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://howlongtobeat.com/search_results?page=1>
{'game_name': 'Horizon Zero Dawn Complete Edition'}
2022-05-12 13:37:12 [scrapy.core.engine] INFO: Closing spider (finished)
2022-05-12 13:37:12 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 490,
'downloader/request_count': 1,
'downloader/request_method_count/POST': 1,
'downloader/response_bytes': 2754,
'downloader/response_count': 1,
'downloader/response_status_count/200': 1,
'elapsed_time_seconds': 1.49537,
'finish_reason': 'finished',
'finish_time': datetime.datetime(2022, 5, 12, 7, 37, 12, 172047),
'httpcompression/response_bytes': 23986,
'httpcompression/response_count': 1,
'item_scraped_count': 20,