1

I am trying to render a website to get the coordinates of human-visible objects from an HTML page. I can get the coordinates of a specific object with Selenium, but I need the coordinates of all objects. The main problem is to filter the divisions where there is information and where they are transparent (I attach a picture), how can I do it? enter image description here

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep, strftime
driver = webdriver.Chrome()
driver.maximize_window() # now screen top-left corner == browser top-left corner 
driver.get("http://stackoverflow.com/questions")
question = driver.find_element_by_link_text("Newest")
y_relative_coord = question.location['y']
browser_navigation_panel_height = driver.execute_script('return window.outerHeight - window.innerHeight;')
y_absolute_coord = y_relative_coord + browser_navigation_panel_height
x_absolute_coord = question.location['x']
print(x_absolute_coord, y_absolute_coord)

Made some additions, but for some reason does not want to work correctly

import cv2
import numpy as np
from selenium import webdriver

driver = webdriver.Chrome()
driver.set_window_size(1920, 1080)
driver.get("http://stackoverflow.com/questions")
driver.save_screenshot("screenshot.png")
y_relative_coord = []
x_absolute_coord = []
contours = []
list_of_visible_elements = driver.find_elements_by_xpath( "//div[not(contains(@style,'display:none'))]")
for element in list_of_visible_elements:
    y_relative_coord = element.location['y']
    size = element.size
    w, h = size['width'], size['height']
    browser_navigation_panel_height = driver.execute_script('return window.outerHeight - window.innerHeight;')
    y_absolute_coord = y_relative_coord + browser_navigation_panel_height
    x_absolute_coord = element.location['x']
    x = [x_absolute_coord, y_absolute_coord, w, h]
    contours.append(x)
    
img = cv2.imread('screenshot.png')
result = img.copy()
for cntr in contours:
    x,y,w,h = cntr
    cv2.rectangle(result, (x, y), (x+w, y+h), (0, 0, 255), 2)
cv2.imshow("bounding_box", result)
cv2.waitKey(0)
cv2.destroyAllWindows()

UPD I've improved the code a bit and it seems to work fine but still not very accurate, how could the results be improved?

import cv2
import os
import numpy as np
from selenium import webdriver
from selenium.webdriver.chrome.options import Options


options = Options()
options.add_argument("--headless")
options.add_argument("window-size=1920,1080")

driver = webdriver.Chrome(chrome_options=options)
driver.set_window_size(1920, 1080)
driver.get("https://stackoverflow.com/questions")
driver.save_screenshot("s.png")
y_relative_coord = []
x_absolute_coord = []
contours = []
list_of_visible_elements = driver.find_elements_by_xpath( "//div[not(contains(@style,'display:none'))]")
for element in list_of_visible_elements:
    y_relative_coord = element.location['y']
    size = element.size
    w, h = size['width'], size['height']
    browser_navigation_panel_height = driver.execute_script('return window.outerHeight - window.innerHeight;')
    y_absolute_coord = y_relative_coord + browser_navigation_panel_height
    x_absolute_coord = element.location['x']
    if x_absolute_coord !=0 and y_absolute_coord !=0 and w != 0 and h != 0 : 
        x = [x_absolute_coord, y_absolute_coord, w, h]
    
        contours.append(x)
    
img = cv2.imread('s.png')
result = img.copy()
for cntr in contours:
    x,y,w,h = cntr
    cv2.rectangle(result, (x, y), (x+w, y+h), (0, 0, 255), 1)
cv2.imshow("bounding_box", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
try: 
    os.remove("s.png")
except: pass

enter image description here

Nikita Kit
  • 117
  • 1
  • 10

2 Answers2

0

yes you need to use //div[not(contains(@style,'display:none')) and use find_elements instead of find_element to have all visible web elements in a list.

XPATH which says give me all visible elements or directly all the elements whose display is equal to none.

Code :

y_relative_coord = []
x_absolute_coord = []
list_of_visible_elements = driver.find_elements(By.XPATH, "//div[not(contains(@style,'display:none'))]")
for element in list_of_visible_elements:
    y_relative_coord = element.location['y']
    browser_navigation_panel_height = driver.execute_script('return window.outerHeight - window.innerHeight;')
    y_absolute_coord = y_relative_coord + browser_navigation_panel_height
    x_absolute_coord = element.location['x']
    print(x_absolute_coord, y_relative_coord)
cruisepandey
  • 28,520
  • 6
  • 20
  • 38
0

You can use driver.execute_script to run a recursive generator function in Javascript that will traverse all the visible DOM nodes and return only those coordinates which are within the browser window height:

from selenium import webdriver
d = webdriver.Chrome('/path/to/chromedriver')
d.get('https://stackoverflow.com/questions/tagged/python')
r = d.execute_script("""
function* get_nodes(root){
   var style = window.getComputedStyle(root)
   if (style.getPropertyValue('display') != 'none'){
       if (root.offsetTop <= (window.outerHeight - window.innerHeight)){
          var b_d = root.getBoundingClientRect()
          yield [b_d.x, b_d.y, b_d.width, b_d.height]
       }
       for (var i of root.childNodes){
          if (i.nodeType === 1){
             yield* get_nodes(i)
          }
       }
   }
}
return [...get_nodes(document.body)]
""")
print(r)

Output:

[[0, 0, 1200, 3519.9375], [0, 0, 1200, 0], [0, 0, 1200, 50], [0, 3, 1200, 47], [0, 3, 166, 47], [0, 3, 166, 47], [8, 9.5, 150, 30], [166, 10, 261.59375, 33], [168, 12, 68.359375, 29], [168, 12, 68.359375, 29], [240.359375, 12, 86.484375, 29], [240.359375, 12, 86.484375, 29], [330.84375, 12, 94.75, 29], [330.84375, 12, 94.75, 29], [427.59375, 9.703125, 628.9375, 33.59375], [439.59375, 9.703125, 604.9375, 33.59375], [439.59375, 9.703125, 604.9375, 33.59375], [1056.53125, 3, 143.46875, 47], [1056.53125, 10, 143.46875, 33], [1060.53125, 10, 59.453125, 33], [1123.578125, 10, 68.421875, 33], [1200, 26.5, 0, 0], [0, 50, 1200, 3147.9375], [0, 50, 164, 3147.9375], [0, 50, 164, 605], [0, 74, 154, 573.171875], [0, 74, 154, 573.171875], [0, 74, 154, 34], [0, 74, 154, 34], [8, 78, 142, 26], [8, 78, 35.765625, 26], [0, 124, 154, 221], [0, 124, 154, 221], [8, 124, 146, 14], [0, 142, 154, 33], [0, 142, 154, 33], [30, 150, 65.515625, 17], [8, 361, 146, 14], [0, 383, 154, 264.171875], [13, 396, 120.6875, 33], [13, 472, 129, 105.796875], [164, 50, 1036, 3147.9375], [189, 74, 663, 2952.1875], [189, 74, 663, 59], [189, 74, 547.984375, 35], [189, 74, 311.1875, 35], [748.984375, 74, 103.015625, 59], [748.984375, 74, 103.015625, 37.78125], [189, 133, 663, 121], [189, 133, 663, 85], [407.65625, 282.59375, 42.265625, 13], [470.109375, 282.59375, 35.015625, 13], [525.3125, 282.59375, 50.53125, 13], [579.84375, 282.59375, 24.109375, 13], [624.140625, 282.59375, 70.125, 13], [189, 318.1875, 663, 0], [189, 318.1875, 663, 243.1875], [190, 319.1875, 661, 182], [186, 319.1875, 669, 182], [202, 335.1875, 161.40625, 150], [202, 331.1875, 161.40625, 104], [202, 331.1875, 57.8125, 19], [202, 358.1875, 161.40625, 19], [200, 358.1875, 165.40625, 19], [202, 361.1875, 13, 13], [202, 361.1875, 13, 13], [219, 358.1875, 83.859375, 19], [202, 385.1875, 161.40625, 19], [200, 385.1875, 165.40625, 19], [202, 388.1875, 13, 13], [202, 388.1875, 13, 13], [219, 385.1875, 144.40625, 19], [202, 412.1875, 161.40625, 19], [200, 412.1875, 165.40625, 19], [202, 415.1875, 13, 13], [202, 415.1875, 13, 13], [219, 412.1875, 82.34375, 19], [395.40625, 335.1875, 157.53125, 150], [395.40625, 331.1875, 157.53125, 158], [395.40625, 331.1875, 69.796875, 19], [395.40625, 358.1875, 157.53125, 19], [393.40625, 358.1875, 161.53125, 19], [395.40625, 361.1875, 13, 13], [395.40625, 361.1875, 13, 13], [412.40625, 358.1875, 55.421875, 19], [395.40625, 385.1875, 157.53125, 19], [393.40625, 385.1875, 161.53125, 19], [395.40625, 388.1875, 13, 13], [395.40625, 388.1875, 13, 13], [412.40625, 385.1875, 105.40625, 19], [395.40625, 412.1875, 157.53125, 19], [393.40625, 412.1875, 161.53125, 19], [395.40625, 415.1875, 13, 13], [395.40625, 415.1875, 13, 13], [412.40625, 412.1875, 79.5625, 19], [395.40625, 439.1875, 157.53125, 19], [393.40625, 439.1875, 161.53125, 19], [412.40625, 439.1875, 100.5625, 19], [584.9375, 335.1875, 234.65625, 150], [584.9375, 331.1875, 234.65625, 77], [584.9375, 331.1875, 87.3125, 19], [584.9375, 358.1875, 234.65625, 19], [582.9375, 358.1875, 238.65625, 19], [584.9375, 361.1875, 13, 13], [584.9375, 361.1875, 13, 13], [601.9375, 358.1875, 121.03125, 19], [584.9375, 385.1875, 234.65625, 19], [582.9375, 385.1875, 238.65625, 19], [584.9375, 388.1875, 13, 13], [584.9375, 388.1875, 13, 13], [601.9375, 385.1875, 133.46875, 19], [608.9375, 412.1875, 210.65625, 37], [608.9375, 412.1875, 210.65625, 37], [611.9375, 421.6875, 73.171875, 16], [613.9375, 419.28125, 69.171875, 24], [662.109375, 423.28125, 16, 16], [685.109375, 415.1875, 23, 29], [708.109375, 421.6875, 0, 16], [189, 2942.1875, 0, 16], [876, 74, 300, 3084.9375], [876, 74, 300, 371], [877, 74, 298, 358], [877, 74, 298, 41], [877, 127, 298, 34], [893, 127, 22.15625, 34], [915.15625, 127, 243.84375, 34], [915.15625, 127, 208.8125, 33], [877, 173, 298, 51], [893, 173, 22.15625, 51], [915.15625, 173, 243.84375, 51], [915.15625, 173, 236.59375, 50], [877, 755, 298, 42], [889, 762, 274, 25], [889, 762, 67.984375, 25], [1163, 762, 0, 25], [877, 797, 298, 966], [877, 797, 298, 920], [877, 797, 298, 115], [877, 797, 298, 115], [889, 806, 40, 100], [889, 806, 32, 32], [929, 803, 234, 103], [929, 806, 234, 32], [929, 841, 234, 16], [929, 841, 33.234375, 15], [969.234375, 841, 86.09375, 16], [983.234375, 841, 72.09375, 16], [929, 860, 234, 16], [929, 860, 81.953125, 16], [929, 861, 81.953125, 13], [1017.953125, 860, 63.390625, 16], [1017.953125, 861, 63.390625, 13], [929, 879, 31.84375, 24], [962.84375, 879, 48.296875, 24], [929, 906, 234, 0], [1153, 803, 16, 16], [1071.640625, 776, 97.359375, 22], [877, 912, 298, 99], [889, 921, 40, 84], [889, 921, 32, 32], [929, 918, 234, 87], [929, 921, 234, 16], [929, 940, 234, 16], [929, 940, 57.96875, 15], [993.96875, 940, 114.015625, 16], [1007.96875, 940, 100.015625, 16], [929, 959, 234, 16], [929, 959, 78.109375, 16], [929, 960, 78.109375, 13], [1014.109375, 959, 63.390625, 16], [1014.109375, 960, 63.390625, 13], [929, 978, 48.296875, 24], [979.296875, 978, 51.125, 24], [929, 1005, 234, 0], [1153, 918, 16, 16], [1071.640625, 891, 97.359375, 22], [877, 1011, 298, 115], [889, 1020, 40, 100], [889, 1020, 32, 32], [929, 1017, 234, 103], [929, 1020, 234, 32], [929, 1055, 234, 16], [929, 1055, 82.40625, 15], [1018.40625, 1055, 114.015625, 16], [1032.40625, 1055, 100.015625, 16], [929, 1074, 234, 16], [929, 1074, 63.390625, 16], [929, 1075, 63.390625, 13], [929, 1093, 49.171875, 24], [980.171875, 1093, 32.234375, 24], [929, 1120, 234, 0], [1153, 1017, 16, 16], [1071.640625, 990, 97.359375, 22], [877, 1126, 298, 99], [889, 1135, 40, 84], [889, 1135, 32, 32], [929, 1132, 234, 87], [929, 1135, 234, 16], [929, 1154, 234, 16], [929, 1154, 58.796875, 15], [994.796875, 1154, 114.015625, 16], [1008.796875, 1154, 100.015625, 16], [929, 1173, 234, 16], [929, 1173, 81.984375, 16], [929, 1174, 81.984375, 13], [1017.984375, 1173, 63.390625, 16], [1017.984375, 1174, 63.390625, 13], [929, 1192, 51.125, 24], [982.125, 1192, 64.421875, 24], [929, 1219, 234, 0], [1153, 1132, 16, 16], [1071.640625, 1105, 97.359375, 22], [877, 1225, 298, 131], [889, 1234, 40, 116], [889, 1234, 32, 32], [929, 1231, 234, 119], [929, 1234, 234, 48], [929, 1285, 234, 16], [929, 1285, 82.921875, 15], [1018.921875, 1285, 114.015625, 16], [1032.921875, 1285, 100.015625, 16], [929, 1304, 234, 16], [929, 1304, 78.5625, 16], [929, 1305, 78.5625, 13], [1014.5625, 1304, 63.390625, 16], [1014.5625, 1305, 63.390625, 13], [929, 1323, 51.125, 24], [982.125, 1323, 67.234375, 24], [1153, 1231, 16, 16], [1071.640625, 1204, 97.359375, 22], [877, 1356, 298, 115], [889, 1365, 40, 100], [889, 1365, 32, 32], [929, 1362, 234, 103], [929, 1365, 234, 32], [929, 1400, 234, 16], [929, 1400, 52.46875, 15], [988.46875, 1400, 114.015625, 16], [1002.46875, 1400, 100.015625, 16], [929, 1419, 234, 16], [929, 1419, 77.3125, 16], [929, 1420, 77.3125, 13], [1013.3125, 1419, 63.390625, 16], [1013.3125, 1420, 63.390625, 13], [929, 1438, 48.984375, 24], [979.984375, 1438, 67.234375, 24], [929, 1465, 234, 0], [1153, 1362, 16, 16], [1071.640625, 1335, 97.359375, 22], [877, 1471, 298, 131], [889, 1480, 40, 116], [889, 1480, 32, 32], [929, 1477, 234, 119], [929, 1480, 234, 32], [929, 1515, 234, 32], [929, 1515, 150.71875, 15], [929, 1531, 95.8125, 16], [943, 1531, 81.8125, 16], [929, 1550, 234, 16], [929, 1550, 63.390625, 16], [929, 1551, 63.390625, 13], [999.390625, 1550, 88.953125, 16], [999.390625, 1551, 88.953125, 13], [929, 1569, 48.296875, 24], [979.296875, 1569, 64.421875, 24], [1153, 1477, 16, 16], [1071.640625, 1450, 97.359375, 22], [877, 1602, 298, 109], [889, 1611, 32, 32], [929, 1608, 234, 103], [929, 1611, 234, 16], [929, 1630, 234, 32], [929, 1630, 217.78125, 15], [929, 1646, 79.0625, 16], [943, 1646, 65.0625, 16], [929, 1665, 234, 16], [929, 1665, 63.390625, 16], [929, 1666, 63.390625, 13], [929, 1684, 49.171875, 24], [980.171875, 1684, 48.296875, 24], [929, 1711, 234, 0], [1153, 1608, 16, 16], [1071.640625, 1581, 97.359375, 22], [0, 0, 1200, 655], [487.9937438964844, 341.8999938964844, 223.99685668945312, 18.600006103515625], [487.9937438964844, 368.8999938964844, 223.99685668945312, 55.556243896484375], [487.9937438964844, 370.1000061035156, 223.99685668945312, 30.600006103515625], [487.9937438964844, 370.1000061035156, 223.99685668945312, 30.600006103515625], [489.1937561035156, 382.70001220703125, 221.59689331054688, 18], [487.9937438964844, 403.1000061035156, 223.99685668945312, 20.15625], [697.5906372070312, 332.29998779296875, 24, 24.600006103515625], [665.859375, 3229.9375, 198, 171], [665.859375, 3229.9375, 198, 34], [665.859375, 3229.9375, 121.0625, 33], [786.921875, 3229.9375, 0, 16], [665.859375, 3275.9375, 198, 125], [665.859375, 3275.9375, 198, 25], [665.859375, 3275.9375, 87.84375, 25], [665.859375, 3300.9375, 198, 25], [665.859375, 3300.9375, 76.75, 25], [665.859375, 3325.9375, 198, 25], [665.859375, 3325.9375, 138.375, 25], [665.859375, 3350.9375, 198, 25], [665.859375, 3350.9375, 65.9375, 25], [0, 0, 0, 0], [0, 50, 3.59375, 17]]
Ajax1234
  • 69,937
  • 8
  • 61
  • 102