1

I am trying to make this code work in the browser.

scrape.html

<!doctype html>

<html>
    <head>
        <title> </title>
        <style>
            label {
                margin-bottom: 2%;
            }

            div {
                margin-bottom: 2%;
            }
        </style>
        <script src = "../../AppData/Roaming/npm/node_modules/phantomjs/lib/phantomjs.js"></script>
        <script src = "../../AppData/Roaming/npm/node_modules/casperjs/modules/casper.js"></script>
    </head>

    <body>
        <form action="#" id = "form" method="get">
            <label for="start">Start Page</label>
            <div>
                <input type = "number" name = "number1" value = "start"></input>
            </div>
            <label for="end">End Page</label>
            <div>
                <input type = "number" name = "number2" value = "end"></input>
            </div>
            <button onclick="myFunction()"> Submit  </button>
        </form>
        <script> 
            function myFunction() {
                var x = document.getElementById("form");
                var number = [];
                var i;
                for (i = 0; i < x.length-1 ;i++) {
                    number.push(x.elements[i].value);
                    }
                console.log(number);
                //var casper = require('casper').create();
                casper.then(function(){
                    console.log(this.fetchText('div.info-list-text'));

                    var startUrl = 'http://www.bedbathandbeyond.com/comm/c/Michigan/p/number1*3';
                    var endUrl = 'http://www.bedbathandbeyond.com/comm/c/Michigan/p/number2*3'
                });
            }
        </script>
    </body>
</html>

It produces the following error,

casper.js:32 Uncaught ReferenceError: patchRequire is not defined

I think the error is caused because we can't import modules in the browser using require the way we can in Node.js. In order to make this functionality available in the browser, I installed browserify in my project folder and created the following JS file.

browserReq.js

var casper = require('casper').create();

var url = 'ok,-MI'
var baseUrl = 'http://www.bedandbeyond.com/comm/c/'+url;
console.log(baseUrl);

var nextBtn = "a.navigation-button.next";

var allLinks = [];

casper.start(baseUrl);

casper.waitForSelector(nextBtn, processPage);

casper.run();

function processPage() {
  var pageData = this.evaluate(getPageData);
  allLinks = allLinks.concat(pageData);

  if (!this.exists(nextBtn)) {
    return;
  }

  this.thenClick(nextBtn).then(function() {
    //this.echo(this.getCurrentUrl());
    //this.wait(1000);
  }).then(processPage);
}

function getPageData(){
  //return document.title;
  var links = document.getElementsByClassName('pro-title');
  links = Array.prototype.map.call(links,function(link){
    return link.getAttribute('href');
  });
  return links;
}

casper.then(function(){
  //require('utils').dump(allLinks);
  this.each(allLinks,function(self,link){
    this.thenOpen(link,function(a){
      jsonObj = {};
      jsonObj.title = this.fetchText('a.profile-full-name');

      jsonObj.services = this.getHTML('div.info-list-text span:nth-child(2) span');
      jsonObj.services = jsonObj.services.replace(/&amp;/g,"and");  

      jsonObj.location = this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(3) div.info-list-text span');
      //jsonObj.contact = this.fetchText('span.pro-contact-text');
      jsonObj.description = this.getHTML('div.profile-about div:nth-child(1)');  
      //jsonObj.description.replace(/\s/g, '');   

      //require('utils').dump(jsonObj);
      //jsonObj.description = jsonObj.description.replace(/[\t\n]/g,"");   

      //jsonObj = JSON.stringify(jsonObj, null, '\t');
      require('utils').dump(jsonObj);
    });
  });
});

I am running this file using browserify browserReq.js -o browserReqOut.js -d.

It gives me the following error, Cannot find module 'casper' from project folder location. I have installed casperJS in the project folder and also globally.

UPDATE 1:

I am posting values of the form elements in scrape.html to the following code,

scrape.php

<?php $url = $_POST["urlToScrape"]; ?><br>
<?php $page1 = $_POST["number1"]; ?> <br>
<?php $page2 = $_POST["number2"]; ?><br>
<?php $newProxyList = explode(PHP_EOL, $_POST['proxy']); ?> <br>

<?php echo $url ?> <br>
<?php echo $page1 ?> <br>
<?php echo $page2 ?> <br>
<?php echo $newProxyList[0] ?> <br>

<?php echo "<script> 

    var casper = require('casper').create();

var baseUrl = 'http://www.houzz.com/professionals/c/Nashville,-TN';
console.log(baseUrl);

var nextBtn = 'a.navigation-button.next';

var allLinks = [];

casper.start(baseUrl);

casper.waitForSelector(nextBtn, processPage);

casper.run();

function processPage() {
  var pageData = this.evaluate(getPageData);
  allLinks = allLinks.concat(pageData);

  if (!this.exists(nextBtn)) {
    return;
  }

  this.thenClick(nextBtn).then(function() {
    this.echo(this.getCurrentUrl());
    //this.wait(1000);
  }).then(processPage);
}

function getPageData(){
  //return document.title;
  var links = document.getElementsByClassName('pro-title');
  links = Array.prototype.map.call(links,function(link){
    return link.getAttribute('href');
  });
  return links;
}

casper.then(function(){
  //require('utils').dump(allLinks);
  this.each(allLinks,function(self,link){
    this.thenOpen(link,function(a){
      jsonObj = {};
      jsonObj.title = this.fetchText('a.profile-full-name');

      jsonObj.services = this.getHTML('div.info-list-text span:nth-child(2) span');
      jsonObj.services = jsonObj.services.replace(/&amp;/g,'and');  

      jsonObj.location = this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(3) div.info-list-text span');
      //jsonObj.contact = this.fetchText('span.pro-contact-text');
      jsonObj.description = this.getHTML('div.profile-about div:nth-child(1)');  
      //jsonObj.description.replace(/\s/g, '');   

      //require('utils').dump(jsonObj);   

      //jsonObj = JSON.stringify(jsonObj, null, '\t');
      require('utils').dump(jsonObj);
    });
  });
});

 </script>"
 ?>

</body>
</html>

It still gives me the same error, Uncaught ReferenceError: require is not defined. Why am I getting this error when PHP is executed on the server and the require module is also available on the server.

  • what does your package.json file look like? – CodingWithSpike Feb 12 '16 at 19:41
  • Just a guess, but it would seem that casperJS has dependencies that aren't available in a browser. I can't see why you would want to do this. casperJS is for testing web sites using Node.JS Why not just use Node.js? – Dave Bush Feb 12 '16 at 20:20

1 Answers1

0

PhantomJS is a full browser, which has its own API. CasperJS uses that API to do stuff. Unless you implement the full PhantomJS API in plain JavaScript in the browser, you're not going to be able to browserify CasperJS.

Artjom B.
  • 61,146
  • 24
  • 125
  • 222
  • I want to go to a page on my server (`page.html`) which asks the user to input the url he wants to scrape. I then want that url to be passed to my casperJS script and start the scrape. Do you have any ideas as to how I could execute this? –  Feb 12 '16 at 21:33
  • Pass it to some kind of server (PHP, Python, Java, Node.js, ASP.NET, ...) and let the server execute the CasperJS script. – Artjom B. Feb 13 '16 at 08:09
  • That's a way how PHP code can be written, but it's not advisable to do it this way nowadays (there are a lot of templating frameworks out there). You can use whatever server technology you want. You can call the server method however you like. Also, have a look at [What is the difference between client-side and server-side programming?](http://stackoverflow.com/questions/13840429/what-is-the-difference-between-client-side-and-server-side-programming) – Artjom B. Feb 14 '16 at 09:59
  • Can you take a look at the `Update 1` section of my question. –  Feb 15 '16 at 17:07
  • This doesn't make any sense. As I said, a CasperJS script cannot run directly in the browser, but you can make a call to the server (Ajax or form submit) and let the script execute on the server through [`shell_exec()`](http://php.net/manual/en/function.shell-exec.php) or similar if you decide to use PHP. – Artjom B. Feb 15 '16 at 17:11
  • Sorry I have never used PHP before. With your help, I have figured out how to execute the casperJS script on the server from PHP. Do you know how to make variables in my PHP script be available in the casperJS script? –  Feb 15 '16 at 18:11
  • You can pass commandline arguments to your CasperJS script, but you can't directly access variables that are in some other process. Have a look at cli. There are also multiple questions here on Stack Overflow about that. – Artjom B. Feb 15 '16 at 18:17