I'm using scrapy to get data and I want to use flask web framework to show the results in webpage. But I don't know how to call the spiders in the flask app. I've tried to use CrawlerProcess
to call my spiders,but I got the error like this :
ValueError
ValueError: signal only works in main thread
Traceback (most recent call last)
File "/Library/Python/2.7/site-packages/flask/app.py", line 1836, in __call__
return self.wsgi_app(environ, start_response)
File "/Library/Python/2.7/site-packages/flask/app.py", line 1820, in wsgi_app
response = self.make_response(self.handle_exception(e))
File "/Library/Python/2.7/site-packages/flask/app.py", line 1403, in handle_exception
reraise(exc_type, exc_value, tb)
File "/Library/Python/2.7/site-packages/flask/app.py", line 1817, in wsgi_app
response = self.full_dispatch_request()
File "/Library/Python/2.7/site-packages/flask/app.py", line 1477, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/Library/Python/2.7/site-packages/flask/app.py", line 1381, in handle_user_exception
reraise(exc_type, exc_value, tb)
File "/Library/Python/2.7/site-packages/flask/app.py", line 1475, in full_dispatch_request
rv = self.dispatch_request()
File "/Library/Python/2.7/site-packages/flask/app.py", line 1461, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "/Users/Rabbit/PycharmProjects/Flask_template/FlaskTemplate.py", line 102, in index
process = CrawlerProcess()
File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 210, in __init__
install_shutdown_handlers(self._signal_shutdown)
File "/Library/Python/2.7/site-packages/scrapy/utils/ossignal.py", line 21, in install_shutdown_handlers
reactor._handleSignals()
File "/Library/Python/2.7/site-packages/twisted/internet/posixbase.py", line 295, in _handleSignals
_SignalReactorMixin._handleSignals(self)
File "/Library/Python/2.7/site-packages/twisted/internet/base.py", line 1154, in _handleSignals
signal.signal(signal.SIGINT, self.sigInt)
ValueError: signal only works in main thread
My scrapy code like this:
class EPGD(Item):
genID = Field()
genID_url = Field()
taxID = Field()
taxID_url = Field()
familyID = Field()
familyID_url = Field()
chromosome = Field()
symbol = Field()
description = Field()
class EPGD_spider(Spider):
name = "EPGD"
allowed_domains = ["epgd.biosino.org"]
term = "man"
start_urls = ["http://epgd.biosino.org/EPGD/search/textsearch.jsp?textquery="+term+"&submit=Feeling+Lucky"]
db = DB_Con()
collection = db.getcollection(name, term)
def parse(self, response):
sel = Selector(response)
sites = sel.xpath('//tr[@class="odd"]|//tr[@class="even"]')
url_list = []
base_url = "http://epgd.biosino.org/EPGD"
for site in sites:
item = EPGD()
item['genID'] = map(unicode.strip, site.xpath('td[1]/a/text()').extract())
item['genID_url'] = base_url+map(unicode.strip, site.xpath('td[1]/a/@href').extract())[0][2:]
item['taxID'] = map(unicode.strip, site.xpath('td[2]/a/text()').extract())
item['taxID_url'] = map(unicode.strip, site.xpath('td[2]/a/@href').extract())
item['familyID'] = map(unicode.strip, site.xpath('td[3]/a/text()').extract())
item['familyID_url'] = base_url+map(unicode.strip, site.xpath('td[3]/a/@href').extract())[0][2:]
item['chromosome'] = map(unicode.strip, site.xpath('td[4]/text()').extract())
item['symbol'] = map(unicode.strip, site.xpath('td[5]/text()').extract())
item['description'] = map(unicode.strip, site.xpath('td[6]/text()').extract())
self.collection.update({"genID":item['genID']}, dict(item), upsert=True)
yield item
sel_tmp = Selector(response)
link = sel_tmp.xpath('//span[@id="quickPage"]')
for site in link:
url_list.append(site.xpath('a/@href').extract())
for i in range(len(url_list[0])):
if cmp(url_list[0][i], "#") == 0:
if i+1 < len(url_list[0]):
print url_list[0][i+1]
actual_url = "http://epgd.biosino.org/EPGD/search/" + url_list[0][i+1]
yield Request(actual_url, callback=self.parse)
break
else:
print "The index is out of range!"
My flask code like this:
@app.route('/', methods=['GET', 'POST'])
def index():
process = CrawlerProcess()
process.crawl(EPGD_spider)
return redirect(url_for('details'))
@app.route('/details', methods = ['GET'])
def epgd():
if request.method == 'GET':
results = db['EPGD_test'].find()
json_results= []
for result in results:
json_results.append(result)
return toJson(json_results)
How can I call my scrapy spiders when using flask web framework?