I am scraping some HTML content..
for i, c in enumerate(cards[75:77]):
print(i)
a = c.find_element_by_class_name("influencer-stagename")
print(a.get_attribute('innerHTML'))
Works fine for all records except the 76th one. Output before error...
0
b'<a class="influencer-analytics-link" href="/influencers/sophiewilling"><h5><span>SOPHIE WILLING</span></h5></a>'
1
b'<a class="influencer-analytics-link" href="/influencers/ferntaylorr"><h5><span>Fern Taylor.</span></h5></a>'
2
b'<a class="influencer-analytics-link" href="/influencers/officialshaniceslatter"><h5><span>Shanice Slatter</span></h5></a>'
3
Stacktrace...
> -------------------------------------------------------------------------
WebDriverException Traceback (most recent call last) <ipython-input-484-0a80d1af1568> in <module>
3 #print(c.find_element_by_class_name("influencer-stagename").text)
4 a = c.find_element_by_class_name("influencer-stagename")
----> 5 print(a.get_attribute('innerHTML').encode('ascii', 'ignore'))
~/anaconda3/envs/py3-env/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py in get_attribute(self, name)
141 self, name)
142 else:
--> 143 resp = self._execute(Command.GET_ELEMENT_ATTRIBUTE, {'name': name})
144 attributeValue = resp.get('value')
145 if attributeValue is not None:
~/anaconda3/envs/py3-env/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py in _execute(self, command, params)
631 params = {}
632 params['id'] = self._id
--> 633 return self._parent.execute(command, params)
634
635 def find_element(self, by=By.ID, value=None):
~/anaconda3/envs/py3-env/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
319 response = self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
~/anaconda3/envs/py3-env/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
WebDriverException: Message: unknown error: bad inspector message: {"id":110297,"result":{"result":{"type":"object","value":{"status":0,"value":"<a class=\"influencer-analytics-link\" href=\"/influencers/bookishemily\"><h5><span>Emily | 18 | GB | Student\uD83C...</span></h5></a>"}}}} (Session info: chrome=75.0.3770.100) (Driver info: chromedriver=2.40.565386 (45a059dc425e08165f9a10324bd1380cc13ca363),platform=Mac OS X 10.14.0 x86_64)
I suspect it is an invalid character in
value":"Emily | 18 | GB | Student\uD83C..."
Specifically I suspect "\uD83C"
Adding
.encode("utf-8") OR .encode('ascii', 'ignore')
to the second print statement changes nothing.
Any thoughts on how to solve this??
UPDATE: The problem is with Emoji characters. I have found 3 examples to far and each has an emoji (pink flower , russian flag and swirling leaves ). If I edit them out with Chrome inspector my code runs fine but this is not a solution that works at scale