I want to scrape the first image post and blacklist the url for the next search, that he skip the already used url and search for the next image post. I tried this to find the first image, but it dont works.
driver = webdriver.Chrome()
driver.get('https://9gag.com/funny')
time.sleep(2)
driver.find_element(By.XPATH, value='//*[@id="qc-cmp2-ui"]/div[2]/div/button[1]/span').click()
time.sleep(2)
gagpost = driver.find_element(By.CSS_SELECTOR,value=".image-post img")
gagpostsurl = gagpost.get_attribute('src')
gagposttitle = gagpost.get_attribute('alt')
print(gagpostsurl)
print(gagposttitle)
error: Traceback (most recent call last): File "C:\Users\klaus\PycharmProjects\testTEST\main.py", line 37, in gagposttitle = gagpost.find_element(By,value='img').get_attribute('alt') File "C:\Users\klaus\AppData\Local\Programs\Python\Python310\lib\site-packages\selenium\webdriver\remote\webelement.py", line 763, in find_element return self._execute(Command.FIND_CHILD_ELEMENT, File "C:\Users\klaus\AppData\Local\Programs\Python\Python310\lib\site-packages\selenium\webdriver\remote\webelement.py", line 740, in _execute return self.parent.execute(command, params) File "C:\Users\klaus\AppData\Local\Programs\Python\Python310\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 428, in execute response = self.command_executor.execute(driver_command, params) File "C:\Users\klaus\AppData\Local\Programs\Python\Python310\lib\site-packages\selenium\webdriver\remote\remote_connection.py", line 345, in execute data = utils.dump_json(params) File "C:\Users\klaus\AppData\Local\Programs\Python\Python310\lib\site-packages\selenium\webdriver\remote\utils.py", line 23, in dump_json return json.dumps(json_struct) File "C:\Users\klaus\AppData\Local\Programs\Python\Python310\lib\json_init.py", line 231, in dumps return _default_encoder.encode(obj) File "C:\Users\klaus\AppData\Local\Programs\Python\Python310\lib\json\encoder.py", line 199, in encode chunks = self.iterencode(o, _one_shot=True) File "C:\Users\klaus\AppData\Local\Programs\Python\Python310\lib\json\encoder.py", line 257, in iterencode return _iterencode(o, 0) File "C:\Users\klaus\AppData\Local\Programs\Python\Python310\lib\json\encoder.py", line 179, in default raise TypeError(f'Object of type {o.class.name} ' TypeError: Object of type type is not JSON serializable
Process finished with exit code 1
I also tried this and sometimes it worked, sometimes not.
driver = webdriver.Chrome()
driver.get('https://9gag.com/funny')
time.sleep(2)
driver.find_element(By.XPATH, value='//*[@id="qc-cmp2-ui"]/div[2]/div/button[1]/span').click()
time.sleep(2)
gagpost = driver.find_element(By.CSS_SELECTOR,value=".image-post img")
gagpostsurl = gagpost.get_attribute('src')
gagposttitle = gagpost.get_attribute('alt')
print(gagpostsurl)
print(gagposttitle)
I would appreciate any help.