-1

I am trying to scrape a table from a website:

After importing the url

print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta content="noindex" name="robots"/>
  <meta charset="utf-8"/>
  <meta content="width=device-width,initial-scale=1,shrink-to-fit=no" name="viewport"/>
  <link href="https://d9mzsvqupf0ma.cloudfront.net/0367505b9e/static/react/favicon.ico" rel="shortcut icon"/>
  <title>
   Reonomy
  </title>
  <script src="/static/react/env.js?1592498512097">
  </script>
  <script onerror='console.error("Error loading Google Maps. Please check your firewall, proxy, or ad blocker settings.")' src="//maps.googleapis.com/maps/api/js?v=3&amp;libraries=places,drawing,geometry&amp;client=gme-scryerinc">
  </script>
  <script type="text/javascript">
   !function(){if(void 0!==window.env&&"production"===window.env.REACT_APP_ENVIRONMENT){var i=window.analytics=window.analytics||[];if(!i.initialize)if(i.invoked)window.console&&console.error&&console.error("Segment snippet included twice.");else{i.invoked=!0,i.methods=["trackSubmit","trackClick","trackLink","trackForm","pageview","identify","reset","group","track","ready","alias","debug","page","once","off","on"],i.factory=function(t){return function(){var e=Array.prototype.slice.call(arguments);return e.unshift(t),i.push(e),i}};for(var e=0;e<i.methods.length;e++){var t=i.methods[e];i[t]=i.factory(t)}i.load=function(e,t){var n=document.createElement("script");n.type="text/javascript",n.async=!0,n.src="https://cdn.segment.com/analytics.js/v1/"+e+"/analytics.min.js";var o=document.getElementsByTagName("script")[0];o.parentNode.insertBefore(n,o),i._loadOptions=t},i.SNIPPET_VERSION="4.1.0",i.load("Jb0xYxcgY3BJTcGWoAmtUP9qwhM9V2pp")}}}()
  </script>
  <link href="https://d9mzsvqupf0ma.cloudfront.net/0367505b9e/static/react/static/css/main.4f4bf592.chunk.css" rel="stylesheet"/>
 </head>
 <body>
  <noscript>
   You need to enable JavaScript to run this app.
  </noscript>
  <div id="root">
  </div>
  <script>
   !function(d){function e(e){for(var t,r,n=e[0],c=e[1],o=e[2],a=0,f=[];a<n.length;a++)r=n[a],Object.prototype.hasOwnProperty.call(s,r)&&s[r]&&f.push(s[r][0]),s[r]=0;for(t in c)Object.prototype.hasOwnProperty.call(c,t)&&(d[t]=c[t]);for(h&&h(e);f.length;)f.shift()();return i.push.apply(i,o||[]),u()}function u(){for(var e,t=0;t<i.length;t++){for(var r=i[t],n=!0,c=1;c<r.length;c++){var o=r[c];0!==s[o]&&(n=!1)}n&&(i.splice(t--,1),e=p(p.s=r[0]))}return e}var r={},l={5:0},s={5:0},i=[];function p(e){if(r[e])return r[e].exports;var t=r[e]={i:e,l:!1,exports:{}};return d[e].call(t.exports,t,t.exports,p),t.l=!0,t.exports}p.e=function(i){var e=[];l[i]?e.push(l[i]):0!==l[i]&&{20:1,21:1,24:1,25:1}[i]&&e.push(l[i]=new Promise(function(e,n){for(var t="static/css/"+({}[i]||i)+"."+{0:"31d6cfe0",1:"31d6cfe0",2:"31d6cfe0",3:"31d6cfe0",7:"31d6cfe0",8:"31d6cfe0",9:"31d6cfe0",10:"31d6cfe0",11:"31d6cfe0",12:"31d6cfe0",13:"31d6cfe0",14:"31d6cfe0",15:"31d6cfe0",16:"31d6cfe0",17:"31d6cfe0",18:"31d6cfe0",19:"31d6cfe0",20:"7bbd82a1",21:"989321a7",22:"31d6cfe0",23:"31d6cfe0",24:"d608a43c",25:"36cb7054",26:"31d6cfe0",27:"31d6cfe0",28:"31d6cfe0",29:"31d6cfe0",30:"31d6cfe0",31:"31d6cfe0",32:"31d6cfe0"}[i]+".chunk.css",c=p.p+t,r=document.getElementsByTagName("link"),o=0;o<r.length;o++){var a=(d=r[o]).getAttribute("data-href")||d.getAttribute("href");if("stylesheet"===d.rel&&(a===t||a===c))return e()}var f=document.getElementsByTagName("style");for(o=0;o<f.length;o++){var d;if((a=(d=f[o]).getAttribute("data-href"))===t||a===c)return e()}var u=document.createElement("link");u.rel="stylesheet",u.type="text/css",u.onload=e,u.onerror=function(e){var t=e&&e.target&&e.target.src||c,r=new Error("Loading CSS chunk "+i+" failed.\n("+t+")");r.code="CSS_CHUNK_LOAD_FAILED",r.request=t,delete l[i],u.parentNode.removeChild(u),n(r)},u.href=c,document.getElementsByTagName("head")[0].appendChild(u)}).then(function(){l[i]=0}));var r=s[i];if(0!==r)if(r)e.push(r[2]);else{var t=new Promise(function(e,t){r=s[i]=[e,t]});e.push(r[2]=t);var n,c=document.createElement("script");c.charset="utf-8",c.timeout=120,p.nc&&c.setAttribute("nonce",p.nc),c.src=p.p+"static/js/"+({}[i]||i)+"."+{0:"ca0cfe7f",1:"1f775947",2:"f3aa526c",3:"8e92118a",7:"8821eefa",8:"e17401b1",9:"6e4ba317",10:"24f1a107",11:"96c5e7b8",12:"7a6ef661",13:"e539811a",14:"37c1ffc4",15:"dc8d4356",16:"2d61de04",17:"23eefbbb",18:"51a9cf50",19:"7f8a5cf4",20:"c409a0e9",21:"00e0dc95",22:"de275a36",23:"114fe889",24:"a1c29240",25:"b1426e77",26:"2eaf037b",27:"cf150351",28:"ac391d82",29:"b2c0bc67",30:"4b510904",31:"5a5b63b1",32:"f8a3d31f"}[i]+".chunk.js";var o=new Error;n=function(e){c.onerror=c.onload=null,clearTimeout(a);var t=s[i];if(0!==t){if(t){var r=e&&("load"===e.type?"missing":e.type),n=e&&e.target&&e.target.src;o.message="Loading chunk "+i+" failed.\n("+r+": "+n+")",o.name="ChunkLoadError",o.type=r,o.request=n,t[1](o)}s[i]=void 0}};var a=setTimeout(function(){n({type:"timeout",target:c})},12e4);c.onerror=c.onload=n,document.head.appendChild(c)}return Promise.all(e)},p.m=d,p.c=r,p.d=function(e,t,r){p.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:r})},p.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},p.t=function(t,e){if(1&e&&(t=p(t)),8&e)return t;if(4&e&&"object"==typeof t&&t&&t.__esModule)return t;var r=Object.create(null);if(p.r(r),Object.defineProperty(r,"default",{enumerable:!0,value:t}),2&e&&"string"!=typeof t)for(var n in t)p.d(r,n,function(e){return t[e]}.bind(null,n));return r},p.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return p.d(t,"a",t),t},p.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},p.p="https://d9mzsvqupf0ma.cloudfront.net/0367505b9e/static/react/",p.oe=function(e){throw console.error(e),e};var t=this.webpackJsonpfrontend=this.webpackJsonpfrontend||[],n=t.push.bind(t);t.push=e,t=t.slice();for(var c=0;c<t.length;c++)e(t[c]);var h=n;u()}([])
  </script>
  <script src="https://d9mzsvqupf0ma.cloudfront.net/0367505b9e/static/react/static/js/6.41e506b7.chunk.js">
  </script>
  <script src="https://d9mzsvqupf0ma.cloudfront.net/0367505b9e/static/react/static/js/main.e68cecb8.chunk.js">
  </script>
 </body>
</html>

When I inspect the website, I see that my table is there between tags:

enter image description here

Still when I use :

print(soup.find_all('td'))

It returns me an empty list. Can someone point out what I did wrong ?

hmmmbob
  • 1,167
  • 5
  • 19
  • 33
  • BeautifulSoup will not execute the JavaScript to generate those `` nodes that the browser produced for (via execution of JavaScript) that then appear in its element inspector. Please refer to the list of related threads that now appear on the right hand side bar. – metatoaster Jun 22 '20 at 03:07
  • Also as an aside, when you ran `print(soup.prettify())` it should be rather apparent that there are no `` tags anywhere in that output - that should explain why `find_all` will not be able to return any of them. – metatoaster Jun 22 '20 at 03:17

1 Answers1

0

Beautifulsoup, doesn't evaluate javascript.

It looks like all those tables are being generated by Javascript. You could use dryscape to evaluate the page before passing it on to beautiful soup.

Alterlife
  • 6,557
  • 7
  • 36
  • 49