I am trying to make this code work in the browser.
scrape.html
<!doctype html>
<html>
<head>
<title> </title>
<style>
label {
margin-bottom: 2%;
}
div {
margin-bottom: 2%;
}
</style>
<script src = "../../AppData/Roaming/npm/node_modules/phantomjs/lib/phantomjs.js"></script>
<script src = "../../AppData/Roaming/npm/node_modules/casperjs/modules/casper.js"></script>
</head>
<body>
<form action="#" id = "form" method="get">
<label for="start">Start Page</label>
<div>
<input type = "number" name = "number1" value = "start"></input>
</div>
<label for="end">End Page</label>
<div>
<input type = "number" name = "number2" value = "end"></input>
</div>
<button onclick="myFunction()"> Submit </button>
</form>
<script>
function myFunction() {
var x = document.getElementById("form");
var number = [];
var i;
for (i = 0; i < x.length-1 ;i++) {
number.push(x.elements[i].value);
}
console.log(number);
//var casper = require('casper').create();
casper.then(function(){
console.log(this.fetchText('div.info-list-text'));
var startUrl = 'http://www.bedbathandbeyond.com/comm/c/Michigan/p/number1*3';
var endUrl = 'http://www.bedbathandbeyond.com/comm/c/Michigan/p/number2*3'
});
}
</script>
</body>
</html>
It produces the following error,
casper.js:32 Uncaught ReferenceError: patchRequire is not defined
I think the error is caused because we can't import modules in the browser using require
the way we can in Node.js. In order to make this functionality available in the browser, I installed browserify
in my project folder and created the following JS file.
browserReq.js
var casper = require('casper').create();
var url = 'ok,-MI'
var baseUrl = 'http://www.bedandbeyond.com/comm/c/'+url;
console.log(baseUrl);
var nextBtn = "a.navigation-button.next";
var allLinks = [];
casper.start(baseUrl);
casper.waitForSelector(nextBtn, processPage);
casper.run();
function processPage() {
var pageData = this.evaluate(getPageData);
allLinks = allLinks.concat(pageData);
if (!this.exists(nextBtn)) {
return;
}
this.thenClick(nextBtn).then(function() {
//this.echo(this.getCurrentUrl());
//this.wait(1000);
}).then(processPage);
}
function getPageData(){
//return document.title;
var links = document.getElementsByClassName('pro-title');
links = Array.prototype.map.call(links,function(link){
return link.getAttribute('href');
});
return links;
}
casper.then(function(){
//require('utils').dump(allLinks);
this.each(allLinks,function(self,link){
this.thenOpen(link,function(a){
jsonObj = {};
jsonObj.title = this.fetchText('a.profile-full-name');
jsonObj.services = this.getHTML('div.info-list-text span:nth-child(2) span');
jsonObj.services = jsonObj.services.replace(/&/g,"and");
jsonObj.location = this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(3) div.info-list-text span');
//jsonObj.contact = this.fetchText('span.pro-contact-text');
jsonObj.description = this.getHTML('div.profile-about div:nth-child(1)');
//jsonObj.description.replace(/\s/g, '');
//require('utils').dump(jsonObj);
//jsonObj.description = jsonObj.description.replace(/[\t\n]/g,"");
//jsonObj = JSON.stringify(jsonObj, null, '\t');
require('utils').dump(jsonObj);
});
});
});
I am running this file using browserify browserReq.js -o browserReqOut.js -d
.
It gives me the following error, Cannot find module 'casper' from project folder location
. I have installed casperJS in the project folder and also globally.
UPDATE 1:
I am posting values of the form elements in scrape.html
to the following code,
scrape.php
<?php $url = $_POST["urlToScrape"]; ?><br>
<?php $page1 = $_POST["number1"]; ?> <br>
<?php $page2 = $_POST["number2"]; ?><br>
<?php $newProxyList = explode(PHP_EOL, $_POST['proxy']); ?> <br>
<?php echo $url ?> <br>
<?php echo $page1 ?> <br>
<?php echo $page2 ?> <br>
<?php echo $newProxyList[0] ?> <br>
<?php echo "<script>
var casper = require('casper').create();
var baseUrl = 'http://www.houzz.com/professionals/c/Nashville,-TN';
console.log(baseUrl);
var nextBtn = 'a.navigation-button.next';
var allLinks = [];
casper.start(baseUrl);
casper.waitForSelector(nextBtn, processPage);
casper.run();
function processPage() {
var pageData = this.evaluate(getPageData);
allLinks = allLinks.concat(pageData);
if (!this.exists(nextBtn)) {
return;
}
this.thenClick(nextBtn).then(function() {
this.echo(this.getCurrentUrl());
//this.wait(1000);
}).then(processPage);
}
function getPageData(){
//return document.title;
var links = document.getElementsByClassName('pro-title');
links = Array.prototype.map.call(links,function(link){
return link.getAttribute('href');
});
return links;
}
casper.then(function(){
//require('utils').dump(allLinks);
this.each(allLinks,function(self,link){
this.thenOpen(link,function(a){
jsonObj = {};
jsonObj.title = this.fetchText('a.profile-full-name');
jsonObj.services = this.getHTML('div.info-list-text span:nth-child(2) span');
jsonObj.services = jsonObj.services.replace(/&/g,'and');
jsonObj.location = this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(3) div.info-list-text span');
//jsonObj.contact = this.fetchText('span.pro-contact-text');
jsonObj.description = this.getHTML('div.profile-about div:nth-child(1)');
//jsonObj.description.replace(/\s/g, '');
//require('utils').dump(jsonObj);
//jsonObj = JSON.stringify(jsonObj, null, '\t');
require('utils').dump(jsonObj);
});
});
});
</script>"
?>
</body>
</html>
It still gives me the same error, Uncaught ReferenceError: require is not defined
. Why am I getting this error when PHP is executed on the server and the require
module is also available on the server.