I have about 100 spiders on a server. Every morning all spiders start scraping and writing all of the logs in their logs. Sometimes a couple of them gives me an error. When a spider gives me an error I have to go to the server and read from log file but I want to read the logs from the mail.
I already set dynamic mail sender as follow:
class FirstBotSpiderMiddleware:
def __init__(self, stats):
self.stats = stats
@classmethod
def from_crawler(cls, crawler):
s = cls(crawler.stats)
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
return s
def process_spider_input(self, response, spider):
return None
def process_spider_output(self, response, result, spider):
for i in result:
yield i
def process_spider_exception(self, response, exception, spider):
pass
def process_start_requests(self, start_requests, spider):
for r in start_requests:
yield r
def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name)
def spider_closed(self, spider,reason):
error_count = self.stats.get_value('log_count/ERROR')
counts = self.stats.get_value('item_scraped_count')
count_403 = self.stats.get_value('downloader/response_status_count/403')
count_404 = self.stats.get_value('downloader/response_status_count/404')
robots_404 = self.stats.get_value('robotstxt/response_status_count/404')
robots_403 = self.stats.get_value('robotstxt/response_status_count/403')
duplicate_count = self.stats.get_value('item_dropped_count')
#I want to read all logs here
content = "some stat string"
self.mailSender(spider.name,content,logs)
def mailSender(self,spider,content,logs):
send_mail(
"Scrapy "+spider+" done",
content,
djsettings.EMAIL_HOST_USER,
['xxx@xxx.com'],
)
I couldn't figure out how to read the error log at spider_closed on middleware dynamically. Do you have any suggestions?