my output is as follows
0 winner loser
1 winner1
2 loser1
3 winner2
4 loser2
5 winner3
6 loser3
how do I remove the empty cells so that winner and loser values are on the same row? I've tried to locate add new line parameters to pipelines but have no luck. Is there any way to over-ride pipelines to only write if item has a value to the row so the output can be on the same row?
spider.py
import scrapy
from scrapy_splash import SplashRequest
from scrapejs.items import SofascoreItemLoader
from scrapy import Spider
import scrapy
import json
from scrapy.http import Request, FormRequest
class MySpider(scrapy.Spider):
name = "jsscraper"
start_urls = ["https://www.sofascore.com/tennis/2018-02-07"]
def start_requests(self):
for url in self.start_urls:
yield SplashRequest(url=url,
callback=self.parse,
endpoint='render.html',
args={'wait':3.5})
def parse(self, response):
for row in response.css('.event-team'):
il = SofascoreItemLoader(selector=row)
il.add_css('winner' , '.event-team:nth-
child(2)::text')
il.add_css('loser' , '.event-team:nth-
child(1)::text')
yield il.load_item()
pipline.py
from scrapy.exporters import CsvItemExporter
class ScrapejsPipeline(object):
def process_item(self, item, spider):
return item
class CsvPipeline(object):
def __init__(self):
self.file = open("quotedata2.csv", 'w+b')
self.exporter = CsvItemExporter(self.file, str)
self.exporter.start_exporting()
def close_spider(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
items.py
import scrapy
from scrapy.loader import ItemLoader
from scrapy.loader.processors import TakeFirst, MapCompose,
from operator import methodcaller
from scrapy import Spider, Request, Selector
class SofascoreItem(scrapy.Item):
loser = scrapy.Field()
winner = scrapy.Field()
#date = scrapy.Field()
class SofascoreItemLoader(ItemLoader):
default_item_class = SofascoreItem
default_input_processor = MapCompose(methodcaller('strip'))
default_output_processor = TakeFirst()