Below JS code(run via python selenium) searches for text in the document,locates the tag and pulls out the css attributes of the HTML element.
styleJSON = driver.execute_script("""function getElementByXpath(path){return document.evaluate(path, document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);}function getPathTo(element) {if(element.id!=='') return 'id("'+element.id+'")';if (element===document.body) return '//'+element.tagName; var ix= 0;var siblings= element.parentNode.childNodes; for (var i= 0; i<siblings.length; i++) {var sibling= siblings[i];if (sibling===element)return getPathTo(element.parentNode)+'/'+element.tagName+'['+(ix+1)+']';if (sibling.nodeType===1 && sibling.tagName===element.tagName)ix++;}}var not_found=[],styleSheet=[],pos,temp;try{var corpus=JSON.parse(arguments[0]);console.log(corpus);for(var k = 0; k < corpus.length; k++){pos=0; elements = getElementByXpath('//*[normalize-space(text())=normalize-space('+cleanStringForXpath(corpus[k])+')]');elem=elements.iterateNext();if(!elem){not_found.push(corpus[k]);} while(elem) { pos++;temp = {'corpus': corpus[k], 'font_size':window.getComputedStyle(elem).fontSize, 'font_color':window.getComputedStyle(elem).color,'font_weight':window.getComputedStyle(elem).fontWeight,'font_family':window.getComputedStyle(elem).fontFamily,'tag': elem.tagName,'xpath':getPathTo(elem),'pos':pos,'text_decoration':window.getComputedStyle(elem).textDecoration}; if(temp.tag!=='SCRIPT' || temp.tag!=='TITLE'){styleSheet.push(temp);console.log(temp);}elem=elements.iterateNext();}}}catch(err){console.log(err);}console.log(not_found);return JSON.stringify({stylesheet:styleSheet,not_found: not_found});""", json.dumps(corpus, ensure_ascii=False, encoding='utf-8'))
My problem is when text with single and double quotes are passed in.I tried using the function cleanStringForXpath from link to sanitize the text passed to xpath query'//*[normalize-space(text())=normalize-space('+cleanStringForXpath(corpus[k])+')]'
,but can't get it working.Tried adding it to execute_script
by simple concatenation """..."""+"""..."""
in python, tried passing the function as argument to execute_script and then calling Function
constructor , tried using %s
modifier in python but none of these yielded any results.What could possibly be the issue here ?