I am trying to create a client side scraper. I would like to use only javascript or jQuery that will run on the client side and fetches the html output in JSON format and displays it on my webpage.
Here is what I tried:
<html>
<head>
<script src="http://code.jquery.com/jquery-1.9.1.min.js"></script>
<script type="text/javascript">
var ExternalURL = "www.example.com"; // This address must not contain any leading "http://"
var ContentLocationInDOM = "#someNode > .childNode"; // If you’re trying to get sub-content from the page, specify the "CSS style" jQuery syntax here, otherwise set this to "null"
$(document).ready(loadContent);
function loadContent()
{
var QueryURL = "http://anyorigin.com/get?url=" + ExternalURL + "&callback=?";
$.getJSON(QueryURL, function(data){
if (data && data != null && typeof data == "object" && data.contents && data.contents != null && typeof data.contents == "string")
{
data = data.contents.replace(/<script[^>]*>[sS]*?</script>/gi, ");
if (data.length > 0)
{
if (ContentLocationInDOM && ContentLocationInDOM != null && ContentLocationInDOM != "null")
{
$(‘#queryResultContainer’).html($(ContentLocationInDOM, data));
}
else
{
$(‘#queryResultContainer’).html(data);
}
}
}
});
}
</script>
</head>
<body>
<div id="queryResultContainer"/>
But I do not want to use any other website API for the accomplishment of my query. As one can see the API is used to fetch the html of other website.
What I am looking for is just a simple way to extract the HTML
body
content from a website and dispplay it on the web page, but the request and response is all client side. There should be no interference of server side script. Please help me with your suggestion.