I developed a web scraper, which goes through the profiles of a Facebook-like website(Lang-8) and save the required data. However, I do not know how to develop a system so that, in case the PC crashes, the code resumes from the last profile it scanned
import requests
from bs4 import BeautifulSoup
profile = 1
while profile <= max_profiles:
url = "http://lang-8.com/" + str(profile)
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, features="html.parser")
for lang in soup.findAll('dd', {'class':'studying_lang_name'}):
lang1 = str(lang.string)
if lang1 == "\n\nPolish\n":
journal = str(url) + "/journals"
open_article(journal)
profile += 1
def open_article(url2):
in_page = 1
while in_page < 5:
source_code = requests.get(url2 + "?page=" + str(in_page))
plain_text = source_code.text
soup = BeautifulSoup(plain_text, features="html.parser")
for link in soup.findAll('h3', {'class':'journal_title'}):
href1 = str(link.find('a').get("href"))
file_create(href1)
in_page += 1
def file_create(linked):
source_code = requests.get(linked)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, features="html.parser")
for text in soup.findAll('li', {'class':'corrections_num'}):
corrections = text.text
for content in soup.findAll('div', {'id':'body_show_ori'}):
text1 = content.text
fout = open(linked[-1] + linked[-2] + linked[-3] + "_" + corrections +
"_.txt", 'w', encoding='utf-8')
fout.write(text1)
fout.close()