0

I tried to nest items as suggested in how to implement nested item in scrapy? but I got the following error message:

Traceback (most recent call last):


File "/home/x/workspace/Crawler/venv/local/lib/python2.7/site-packages/scrapy/utils/defer.py", line 102, in iter_errback
    yield next(it)
  File "/home/x/workspace/Crawler/venv/local/lib/python2.7/site-packages/scrapy/spidermiddlewares/offsite.py", line 29, in process_spider_output
    for x in result:
  File "/home/x/workspace/Crawler/venv/local/lib/python2.7/site-packages/scrapy/spidermiddlewares/referer.py", line 22, in <genexpr>
    return (_set_referer(r) for r in result or ())
  File "/home/x/workspace/Crawler/venv/local/lib/python2.7/site-packages/scrapy/spidermiddlewares/urllength.py", line 37, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "/home/x/workspace/Crawler/venv/local/lib/python2.7/site-packages/scrapy/spidermiddlewares/depth.py", line 58, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "/home/x/workspace/Crawler/test.py", line 53, in parse
    s1['grandsons'] = [dict(gs1), dict(gs2)]
TypeError: iteration over non-sequence

How can I iterate? I already tried to insert the following into the class Sons.Item:

def __iter__(self):
    return iter(self.grandsons)

But it didn't help. What am I doing wrong here? Thanks. :)

The complete code is:

    from scrapy import Field, cmdline, Spider
import os


class FamilyItem():
    name = Field()
    sons = Field()

    def __setitem__(self, item, value):
        self.__dict__[item] = value


class SonsItem():
    name = Field()
    grandsons = Field()

    def __setitem__(self, item, value):
        self.__dict__[item] = value

    def __iter__(self):
       return iter(self.grandsons)


class GrandsonsItem():
    name = Field()
    age = Field()
    weight = Field()
    sex = Field()

    def __setitem__(self, item, value):
        self.__dict__[item] = value


class xSpider(Spider):
    name = 'xspider'
    start_urls = ['http://www.some_forum.de']


    def parse(self, response):

        gs1 = GrandsonsItem()
        gs1['name'] = 'GS1'
        gs1['age'] = 18
        gs1['weight'] = 50

        gs2 = GrandsonsItem()
        gs2['name'] = 'GS2'
        gs2['age'] = 19
        gs2['weight'] = 51

        s1 = SonsItem()
        s1['name'] = 'S1'
        s1['grandsons'] = [dict(gs1), dict(gs2)]

        jenny = FamilyItem()
        jenny['name'] = 'Jenny'
        jenny['sons'] = [dict(s1)]

        yield {'item': jenny}


def remove(filename):
    try:
        os.remove(filename)
    except OSError:
        pass


def main():
    output_file = "test.json"
    remove(output_file)
    cmdline.execute("scrapy runspider test.py -o test.json".split())


if __name__ == '__main__':
    main()
Community
  • 1
  • 1
Jewlz
  • 9
  • 1

0 Answers0