Try this.
from simplified_scrapy import SimplifiedDoc, utils
html = utils.getFileContent(r'test.html')
doc = SimplifiedDoc(html)
details = doc.selects('details')
for detail in details:
print(detail.tag)
If you still have problems, try the following.
import io
from simplified_scrapy import SimplifiedDoc, utils
def getDetails(fileName):
details = []
tag = 'details'
with io.open(fileName, "r", encoding='utf-8') as file:
# Suppose the start and end tags are not on the same line, as shown below
# <details>
# some words
# </details>
line = file.readline() # Read data line by line
stanza = None # Store a details node
while line != '':
if line.strip() == '':
line = file.readline()
continue
if stanza and line.find('</' + tag + '>') >= 0:
doc = SimplifiedDoc(stanza + '</' + tag + '>') # Instantiate a doc
details.append(doc.select(tag))
stanza = None
elif stanza:
stanza = stanza + line
else:
if line.find('<' + tag) >= 0:
stanza = line
line = file.readline()
return details
details = getDetails('test.html')
for detail in details:
print(detail.tag)