-2

I use scrapy python to crawler data on the internet. But, my error: unindent does not match any outer indentation level

Here is my code.

FILE: items.py
import scrapy
class ToeicLesson(scrapy.Item):
     title = scrapy.Field()
     url = scrapy.Field()
     words = scrapy.Field()
     file_urls = scrapy.Field()
     files = scrapy.Field()
     pass
class ToeicWord(scrapy.Item):
     vocabulary = scrapy.Field()
     spelling = scrapy.Field()
     explain = scrapy.Field()
     meaning = scrapy.Field()
     en_example = scrapy.Field()
     vi_example = scrapy.Field()
     file_urls = scrapy.Field()
     files = scrapy.Field()
     pass

FILE: toeic_spider.py
# -*- coding: utf-8 -*-
import scrapy
from scrapy import Spider
from scrapy import Request
from toeic.items import ToeicLesson
from toeic.items import ToeicWord

class ToeicSpider(Spider):
    name = "toeic"
    allowed_domains = ["600tuvungtoeic.com"]
    start_urls = [
        "http://600tuvungtoeic.com",
    ]

    def parse(self, response):
        res_lessons = response.css('div.gallery-item')
        for item in res_lessons:
            lesson = ToeicLesson()

            url = "http://600tuvungtoeic.com/" + 
item.css('div.overlay a::attr(href)').extract_first()
            lesson['url'] = url

            lesson['title'] = item.css('div.content-gallery 
h3::text').extract_first()

            image = item.css('div.image img::attr(src)').extract_first()
            lesson['file_urls'] = [image]

            details_lesson_request = Request(url, 
callback=self.parse_details_lesson)
            details_lesson_request.meta['lesson'] = lesson
            yield details_lesson_request
        yield lesson


    def parse_details_lesson(self, response):
        lesson = response.meta['lesson']
        lesson['words'] = []
        words = response.css('div.tuvung')

        for item in words:
            word = ToeicWord()
            word['vocabulary'] = item.css('div.noidung 
span::text').extract_first()
            word['spelling'] = item.css('div.noidung span::text').extract()[1]

            image = "http://600tuvungtoeic.com/" + 
item.css('div.hinhanh img::attr(src)').extract_first()
            word['file_urls'].append(image)

            word['explain'] = "test"
            word['meaning'] = "Du lieu test meaning"
            word['en_example'] = "Du lieu test en_example"
            #item.css('following-sibling::span.bold::text').extract()[0]
            word['vi_example'] = "Du lieu test vi_example"

            audio = "http://600tuvungtoeic.com/" + 
item.css('div.noidung audio source::attr(src)').extract_first()
            word['file_urls'].append(audio)

            lesson['words'].append(word)
        return lesson

My error: unindent does not match any outer indentation level

Thank so much!

Pham Hai
  • 55
  • 5
  • If my solution solved your problem then mark it as solution for other people with similar problem – Greg Eremeev Mar 13 '17 at 07:26
  • 1
    Possible duplicate of [IndentationError: unindent does not match any outer indentation level](http://stackoverflow.com/questions/492387/indentationerror-unindent-does-not-match-any-outer-indentation-level) – SiHa Mar 13 '17 at 08:06

1 Answers1

0

My error: unindent does not match any outer indentation level

Your file contains spaces and tabs. Find all tabs and replace them to spaces.

Try to check thru python -m tabnanny your_file.py

Greg Eremeev
  • 1,760
  • 5
  • 23
  • 33