I'm trying to scrape the Thingiverse website, more specifically the page displaying a "thing", like this one for example. The problem is that when making a get request (using the python urllib or requests package) the response is an empty HTML file containing a lot of header data, some scripts and an empty react-app div:
<!doctype html>
<html lang="en" xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://ogp.me/ns/fb#">
<head>
<title>PCB Feet/Standoffs for M3 by scruss - Thingiverse</title>
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
<meta charset="utf-8">
<meta http-equiv="Content-Language" content="EN">
<meta http-equiv="imagetoolbar" content="no">
<meta name="keywords"
content="things, digital design, physical objects, rapid prototyping, 3D objects, 3D printing, reprap, fabrication, laser cutter, laser, thingaverse, thingyverse">
<meta name="abstract" content="Share your digital designs for physical objects.">
<meta name="author" content="Thingiverse.com">
<meta name="distribution" content="Global">
<meta name="revisit-after" content="1 days">
<meta name="robots" content="follow,index">
<meta name="description"
content="Download files and build them with your 3D printer, laser cutter, or CNC. Thingiverse is a universe of things.">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="theme-color" content="#248bfb">
<meta property="og:type" content="website">
<meta property="og:title" content="PCB Feet/Standoffs for M3 by scruss">
<meta property="og:description"
content="Basic "I don't want my protoboard shorting or gouging holes in my desk" feet/standoffs for M3-drilled 1.6 mm thick PCBs.">
<meta property="og:image" content="https://cdn.thingiverse.com/assets/d5/9e/0e/1c/f3/featured_preview_pcb_feet.png">
<meta property="twitter:card" content="summary">
<meta property="twitter:site" content="@thingiverse">
<meta property="og:url" content="https://www.thingiverse.com/thing:4796603">
<meta property="twitter:creator" content="@scruss">
<link rel="apple-touch-icon" sizes="57x57"
href="https://cdn.thingiverse.com/site/img/favicons/apple-touch-icon-57x57.png">
<link rel="apple-touch-icon" sizes="114x114"
href="https://cdn.thingiverse.com/site/img/favicons/apple-touch-icon-114x114.png">
<link rel="apple-touch-icon" sizes="72x72"
href="https://cdn.thingiverse.com/site/img/favicons/apple-touch-icon-72x72.png">
<link rel="apple-touch-icon" sizes="144x144"
href="https://cdn.thingiverse.com/site/img/favicons/apple-touch-icon-144x144.png">
<link rel="apple-touch-icon" sizes="60x60"
href="https://cdn.thingiverse.com/site/img/favicons/apple-touch-icon-60x60.png">
<link rel="apple-touch-icon" sizes="120x120"
href="https://cdn.thingiverse.com/site/img/favicons/apple-touch-icon-120x120.png">
<link rel="apple-touch-icon" sizes="76x76"
href="https://cdn.thingiverse.com/site/img/favicons/apple-touch-icon-76x76.png">
<link rel="apple-touch-icon" sizes="152x152"
href="https://cdn.thingiverse.com/site/img/favicons/apple-touch-icon-152x152.png">
<link rel="icon" type="image/png" href="https://cdn.thingiverse.com/site/img/favicons/favicon-192x192.png"
sizes="192x192">
<link rel="icon" type="image/png" href="https://cdn.thingiverse.com/site/img/favicons/favicon-160x160.png"
sizes="160x160">
<link rel="icon" type="image/png" href="https://cdn.thingiverse.com/site/img/favicons/favicon-96x96.png"
sizes="96x96">
<link rel="icon" type="image/png" href="https://cdn.thingiverse.com/site/img/favicons/favicon-16x16.png"
sizes="16x16">
<link rel="icon" type="image/png" href="https://cdn.thingiverse.com/site/img/favicons/favicon-32x32.png"
sizes="32x32">
<meta name="msapplication-TileColor" content="#ffffff">
<meta name="msapplication-TileImage" content="https://cdn.thingiverse.com/site/img/favicons/mstile-144x144.png">
<link rel="alternate" type="application/rss+xml" title="Thingiverse - PCB Feet/Standoffs for M3 Comments"
href="https://rss.thingiverse.com/thing:4796603">
<script type="text/javascript" src="https://www.datadoghq-browser-agent.com/datadog-logs-us.js"></script>
<script>
const ddClientToken = "pub24a00142f6aa558abe1827e911e11e58";
const ddEnv = "production";
const ddVersion = "2.11.0";
DD_LOGS.init({
clientToken: ddClientToken,
forwardErrorsToLogs: true,
service: "thingiverse-client",
env: ddEnv,
version: ddVersion,
sampleRate: 20
});
const ddIsTvNext = true;
const ddBuildTime = "1617625667";
DD_LOGS.addLoggerGlobalContext("is_thingiverse_next", ddIsTvNext);
DD_LOGS.addLoggerGlobalContext("build_time", ddBuildTime);
</script>
<script>
var scripts = ["https://cdn.thingiverse.com/site/js/thingiverse/build/lib-afbc32d766.js","https://cdn.thingiverse.com/site/js/thingiverse/build/header-aa33d7b171.js","https://cdn.thingiverse.com/site/js/thingiverse/build/footer-df22f3acb4.js","https://cdn.thingiverse.com/site/js/thingiverse/build/things-d4ffa805ef.js","https://cdn.thingiverse.com/site/js/thingiverse/build/orders-e1ac5a6395.js","https://cdn.thingiverse.com/site/js/thingiverse/build/gallery-7fc215e644.js"];
var stylesheets = [];
var build_time = 1617625667;
</script>
</head>
<script src="https://cdn.thingiverse.com/site/js/three.min.bundle.js?1617625667"></script>
<div class="react-app" id="react-app"></div>
<script src="https://cdn.thingiverse.com/site/js/app.bundle.js?1617625667"></script>
<script>
(function(w,d,s){w._uptime_rum={};w._uptime_rum.uuid='AVO7-994EF0DD9662F23C';w._uptime_rum.url='https://rum.uptime.com/rum/record-data';s=document.createElement('script');s.async=1;s.src='https://uptime.com/static/rum/compiled/rum.js';d.getElementsByTagName('head')[0].appendChild(s);})(window,document);
</script>
This unfortunately isn't the HTML you see when inspecting the page in a browser. I'm guessing React inserts its HTML later on, which is why the div is empty. Is there any way of getting around that and receiving the actual HTML code that you can see in the browser?