8

I want to create a crawler who take the informations of all pages one by one from number 0 to 10 000 000. I don't matter how much time it takes. I just want it works. Here is the error i obtain

Fatal error: Maximum function nesting level of '100' reached, aborting! in D:\wamp\www\crawler\index.php on line 25

The line 25 is

$htmlstr = (string)$this->curlGet($url);

And there is my complete script.

Thank you for your help !

header('Content-Type: text/html; charset=utf-8'); 
ini_set('max_input_nesting_level','100000');
ini_set('max_execution_time','100000'); 

class crawler{

    private $url;
    private $page;
    private $bothurl;
    private $innerDom = null;
    public $prop;
    public $entry;

    function __construct($entry){
        $this->entry = $entry;
        $this->bothurl = array('http://www.remax-quebec.com/fr/inscription/Q/'.$entry.'.rmx','http://www.remax-quebec.com/en/inscription/Q/'.$entry.'.rmx');
        $this->scan();
    }

    private function scan(){
        $i =0;
        foreach($this->bothurl as $url){
            $this->url = $url;
            $this->lang = ($i==0)?'fr':'en';
            $htmlstr = (string)$this->curlGet($url);
            $dom = new DOMDocument;
            @$dom->loadHTML($htmlstr);
            $this->page = $dom;
            $this->htmlInfos();
            $this->getInfos();
            $i++;
        }
    }

    private function htmlInfos(){
        $divs = $this->page->getElementsByTagName('div');
        foreach($divs as $div){
            if($div->hasAttribute('class') && $div->getAttribute('class') == 'bloc specs'){
                $innerDom = new DOMDocument(); 
                @$innerDom->loadHTML($this->innerHTML($div));
                $this->innerDom = $innerDom;
            }
        }
        if($this->innerDom === null) $this->changeEntry();
    }

    private function getInfos(){
        $sect = 0;

        foreach($this->innerDom->getElementsByTagName('div') as $div){
        # obtenir la description
             $this->getDesc($div->getAttribute('class'),$div);
        # obtenir les caractéristiques
             $this->getCaract($div->getAttribute('class'),$div);
        # obtenir les informations interieur, exterieur et evaluation
             if($div->getAttribute('class') == 'section deux-colonnes'){
                switch($sect){
                    case 0: $this->getSpecInfos($div,'interieur'); break;
                    case 1: $this->getSpecInfos($div,'exterieur'); break;
                    case 2: $this->getSpecInfos($div,'evaluation'); break;
                    case 3: $this->getSpecInfos($div,'equipement'); break;
                    case 4: $this->getSpecInfos($div,'services'); break;
                }
                $sect++;
             }else if($div->getAttribute('class') == 'section'){
        # obtenir les détails des pièces
                foreach($div->getElementsByTagName('table') as $table){
                    if($table->getAttribute('class') == 'details-pieces'){
                        $this->detailPieces($table);
                    }
                }
             }
        }
    }

    private function getDesc($class,$obj){
        if($class == 'section description'){
            $p = $obj->getElementsByTagName('p')->item(0);
            $text = (string)$p->nodeValue;
            $this->prop[$this->lang]['description'] = $text;
        }
    }

    private function getCaract($class,$obj){
        if($class == 'section characteristiques'){
            foreach($obj->getElementsByTagName('div') as $div){
                if(substr($div->getAttribute('class'),0,4) == "item"){
                    $text = (string)$div->nodeValue;
                    $this->prop[$this->lang]['caracteritiques'][substr($div->getAttribute('class'),5)] = $text;
                }
            }
        }
    }

    private function getSpecInfos($obj,$nomInfo){
        foreach($obj->getElementsByTagName('table') as $table){
            foreach($table->getElementsByTagName('tr') as $tr){
                $name = $tr->getElementsByTagName('td')->item(0);
                $value = $tr->getElementsByTagName('td')->item(1);
                $name = substr((string)$name->nodeValue,0,-2);
                $value = (string)$value->nodeValue;
                $this->prop[$this->lang][$nomInfo][$this->noAccents($name)] = $value;
            }
        }
    }

    private function detailPieces($obj){
        $tbody = $obj->getElementsByTagName('tbody')->item(0);
        foreach($tbody->getElementsByTagName('tr') as $tr){
            $name = $tr->getElementsByTagName('td')->item(0);
            $name = (string)$name->nodeValue;
            $level = $tr->getElementsByTagName('td')->item(1);
            $level = (string)$level->nodeValue;
            $dimensions = $tr->getElementsByTagName('td')->item(2);
            $dimensions = (string)$dimensions->nodeValue;
            $floor = $tr->getElementsByTagName('td')->item(3);
            $floor = (string)$floor->nodeValue;
            $desc = $tr->getElementsByTagName('td')->item(4);
            $desc = (string)$desc->nodeValue;

            $this->prop[$this->lang]['pieces'][$this->noAccents($name)]['etage'] = $level;
            $this->prop[$this->lang]['pieces'][$this->noAccents($name)]['dimensions'] = $dimensions;
            $this->prop[$this->lang]['pieces'][$this->noAccents($name)]['revetement'] = $floor;
            $this->prop[$this->lang]['pieces'][$this->noAccents($name)]['description'] = $desc;
        }
    }

    private function innerHTML($element){ 
        $innerHTML = ""; 
        $children = $element->childNodes; 
        foreach ($children as $child) 
        { 
            $tmp_dom = new DOMDocument(); 
            $tmp_dom->appendChild($tmp_dom->importNode($child, true)); 
            $innerHTML.=trim($tmp_dom->saveHTML()); 
        } 
        return $innerHTML; 
    }

    private function noAccents($value){
        $string= strtr($chaine,"ÀÁÂÃÄÅàáâãäåÒÓÔÕÖØòóôõöøÈÉÊËèéêëÇçÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ","aaaaaaaaaaaaooooooooooooeeeeeeeecciiiiiiiiuuuuuuuuynn");
    }

    private function changeEntry(){
        $this->entry++;
        echo $this->entry;
        $this->scan();
    }

    private function curlGet($url){
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, $url);
        curl_setopt($curl, CURLOPT_ENCODING, "gzip");
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
        $data = curl_exec($curl);
        curl_close($curl);
        return $data;
    }
}

$entry = 8678057;

$crawler = new crawler($entry);

echo '<pre>';
print_r($crawler->prop);
echo '</pre>';
Dustin Graham
  • 2,054
  • 1
  • 20
  • 24
Mike Boutin
  • 5,297
  • 12
  • 38
  • 65
  • 3
    I don't really see effort here to try to understand the issue. – simchona Apr 14 '12 at 21:51
  • 3
    You've got an error. That's nice. You've posted code. That's also nice, but where is your question? Do you want us to magically fix your code and post a working solution here? – Bojangles Apr 14 '12 at 21:51
  • No, i just want to know what's wrong with my code. – Mike Boutin Apr 14 '12 at 23:05
  • If you only want examples of crawling, have a look at http://scraperwiki.com. It is a site collecting user-contributed scraping scripts in a few languages (php,ruby,python) and gives a re-usable format from their site. I think it may be usefull to you. If you are not programmer yourself, you may find working and concrete examples there. – renoirb Jan 11 '13 at 16:54
  • 1
    Don't do this recursively. [Rewrite it iteratively](http://stackoverflow.com/questions/1549943/design-patterns-for-converting-recursive-algorithms-to-iterative-ones). – Matt Ball Apr 14 '12 at 21:58
  • If you write code, you're a programmer. Don't be so hard on yourself. Yes, the easier way is [Martin.'s answer](http://stackoverflow.com/a/10157667/139010), but it's not (what I would call) the Right Answer. – Matt Ball Apr 14 '12 at 22:54
  • i'm not very good in maths, i'm not a programmer, i am a web integrator. Do you have a easier way in php ? Thank you for your help ^^ – Mike Boutin Apr 14 '12 at 22:58
  • But i want to learn how to make better coding. U have a tutorial where i can learn that ? – Mike Boutin Apr 14 '12 at 22:59
  • Did you read the answers to the question I linked? – Matt Ball Apr 14 '12 at 23:10
  • Yes, but it's not in php, i fund a post in another blog where it explain it in php ! Thank you very much man – Mike Boutin Apr 15 '12 at 00:14
  • its because of the xdebug setting. see the other answer with: ini_set('xdebug.max_nesting_level', $limit) – c33s Oct 25 '12 at 12:36

4 Answers4

33

Assuming you're using xdebug, you can set your own limit with

ini_set('xdebug.max_nesting_level', $limit)
Martin.
  • 10,494
  • 3
  • 42
  • 68
  • Using assetic with a plugin for less.php in laravel 4 to compile bootstrap, I hit the 100 max error, increasing to 200 allows it to run. Seems legit. – Dustin Graham Feb 21 '14 at 23:42
3

changes the file /etc/mysql/my.cnf parameter to something like that max_allowed_packet = 512M

Get sure you've got xdebug installed (use phpinfo()) and then change the file /etc/php5/fpm/php.ini adding or editing the line : xdebug.max_nesting_level=1000

Restart both services sudo service mysql restart sudo service php5-fpm restart

If it doesn't work you can still set those two parameters to false at /etc/php5/fpm/php.ini xdebug.remote_autostart=0 xdebug.remote_enable=0

medina
  • 8,051
  • 4
  • 25
  • 24
1

In my case, it was related to composer. Some vendors were updated in the composer.json file, but I forgot to run the commands composer update nor composer install. The system generated a cascade of errros, which was causing this 'maximum nested level'.

After executing those commands, the problem was fixed

Nicolas
  • 1,320
  • 2
  • 16
  • 28
0

Assuming you didn't make a drop-dead halt mistake, just change the limit of xdebug.

I sloved this problem by changed the xdebug.ini file. (In my mac, the path is /usr/local/php5-5.6.17-20160108-103504/php.d/50-extension-xdebug.ini, maybe yours would be a little different. )

Add a new line at the bottom of xdebug.ini file:

xdebug.max_nesting_level=500


Remember: You must change the xdebug.ini corresponding to the php you are using. For example, if you installed php5 and xampp in your computer, you need to figure out which php you're using.

Frank Miller
  • 376
  • 3
  • 5