I tried to nest items as suggested in how to implement nested item in scrapy? but I got the following error message:
Traceback (most recent call last):
File "/home/x/workspace/Crawler/venv/local/lib/python2.7/site-packages/scrapy/utils/defer.py", line 102, in iter_errback
yield next(it)
File "/home/x/workspace/Crawler/venv/local/lib/python2.7/site-packages/scrapy/spidermiddlewares/offsite.py", line 29, in process_spider_output
for x in result:
File "/home/x/workspace/Crawler/venv/local/lib/python2.7/site-packages/scrapy/spidermiddlewares/referer.py", line 22, in <genexpr>
return (_set_referer(r) for r in result or ())
File "/home/x/workspace/Crawler/venv/local/lib/python2.7/site-packages/scrapy/spidermiddlewares/urllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "/home/x/workspace/Crawler/venv/local/lib/python2.7/site-packages/scrapy/spidermiddlewares/depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "/home/x/workspace/Crawler/test.py", line 53, in parse
s1['grandsons'] = [dict(gs1), dict(gs2)]
TypeError: iteration over non-sequence
How can I iterate? I already tried to insert the following into the class Sons.Item:
def __iter__(self):
return iter(self.grandsons)
But it didn't help. What am I doing wrong here? Thanks. :)
The complete code is:
from scrapy import Field, cmdline, Spider
import os
class FamilyItem():
name = Field()
sons = Field()
def __setitem__(self, item, value):
self.__dict__[item] = value
class SonsItem():
name = Field()
grandsons = Field()
def __setitem__(self, item, value):
self.__dict__[item] = value
def __iter__(self):
return iter(self.grandsons)
class GrandsonsItem():
name = Field()
age = Field()
weight = Field()
sex = Field()
def __setitem__(self, item, value):
self.__dict__[item] = value
class xSpider(Spider):
name = 'xspider'
start_urls = ['http://www.some_forum.de']
def parse(self, response):
gs1 = GrandsonsItem()
gs1['name'] = 'GS1'
gs1['age'] = 18
gs1['weight'] = 50
gs2 = GrandsonsItem()
gs2['name'] = 'GS2'
gs2['age'] = 19
gs2['weight'] = 51
s1 = SonsItem()
s1['name'] = 'S1'
s1['grandsons'] = [dict(gs1), dict(gs2)]
jenny = FamilyItem()
jenny['name'] = 'Jenny'
jenny['sons'] = [dict(s1)]
yield {'item': jenny}
def remove(filename):
try:
os.remove(filename)
except OSError:
pass
def main():
output_file = "test.json"
remove(output_file)
cmdline.execute("scrapy runspider test.py -o test.json".split())
if __name__ == '__main__':
main()