I use scrapy python to crawler data on the internet. But, my error: unindent does not match any outer indentation level
Here is my code.
FILE: items.py
import scrapy
class ToeicLesson(scrapy.Item):
title = scrapy.Field()
url = scrapy.Field()
words = scrapy.Field()
file_urls = scrapy.Field()
files = scrapy.Field()
pass
class ToeicWord(scrapy.Item):
vocabulary = scrapy.Field()
spelling = scrapy.Field()
explain = scrapy.Field()
meaning = scrapy.Field()
en_example = scrapy.Field()
vi_example = scrapy.Field()
file_urls = scrapy.Field()
files = scrapy.Field()
pass
FILE: toeic_spider.py
# -*- coding: utf-8 -*-
import scrapy
from scrapy import Spider
from scrapy import Request
from toeic.items import ToeicLesson
from toeic.items import ToeicWord
class ToeicSpider(Spider):
name = "toeic"
allowed_domains = ["600tuvungtoeic.com"]
start_urls = [
"http://600tuvungtoeic.com",
]
def parse(self, response):
res_lessons = response.css('div.gallery-item')
for item in res_lessons:
lesson = ToeicLesson()
url = "http://600tuvungtoeic.com/" +
item.css('div.overlay a::attr(href)').extract_first()
lesson['url'] = url
lesson['title'] = item.css('div.content-gallery
h3::text').extract_first()
image = item.css('div.image img::attr(src)').extract_first()
lesson['file_urls'] = [image]
details_lesson_request = Request(url,
callback=self.parse_details_lesson)
details_lesson_request.meta['lesson'] = lesson
yield details_lesson_request
yield lesson
def parse_details_lesson(self, response):
lesson = response.meta['lesson']
lesson['words'] = []
words = response.css('div.tuvung')
for item in words:
word = ToeicWord()
word['vocabulary'] = item.css('div.noidung
span::text').extract_first()
word['spelling'] = item.css('div.noidung span::text').extract()[1]
image = "http://600tuvungtoeic.com/" +
item.css('div.hinhanh img::attr(src)').extract_first()
word['file_urls'].append(image)
word['explain'] = "test"
word['meaning'] = "Du lieu test meaning"
word['en_example'] = "Du lieu test en_example"
#item.css('following-sibling::span.bold::text').extract()[0]
word['vi_example'] = "Du lieu test vi_example"
audio = "http://600tuvungtoeic.com/" +
item.css('div.noidung audio source::attr(src)').extract_first()
word['file_urls'].append(audio)
lesson['words'].append(word)
return lesson
My error: unindent does not match any outer indentation level
Thank so much!