1

I'm looking for a method to make the following script scrape multiple pages located in an array and write the selected content into a text or excel document.

Any ideas? Is this possible? And another question would be why the script works on localhost but not when placed on the server. Thank you.

<?php
include_once('simple_html_dom.php');
function scraping() {
    // create HTML DOM

    $html = file_get_html('http://lmvz.anofm.ro:8080/lmv/detalii.jsp?UNIQUEJVID=50/01/1150001435/1&judet=50');

    // get article block
    if($html && is_object($html) && isset($html->nodes)){

    foreach($html->find('/html/body/table') as $article) {
        // get title

        $item['titlu'] = trim($article->find('/tbody/tr[1]/td/div', 0)->plaintext);

        // get body
        $item['tr2'] = trim($article->find('/tbody/tr[2]/td[2]', 0)->plaintext);
        $item['tr3'] = trim($article->find('/tbody/tr[3]/td[2]', 0)->plaintext);
        $item['tr4'] = trim($article->find('/tbody/tr[4]/td[2]', 0)->plaintext);
        $item['tr5'] = trim($article->find('/tbody/tr[5]/td[2]', 0)->plaintext);
        $item['tr6'] = trim($article->find('/tbody/tr[6]/td[2]', 0)->plaintext);
        $item['tr7'] = trim($article->find('/tbody/tr[7]/td[2]', 0)->plaintext);
        $item['tr8'] = trim($article->find('/tbody/tr[8]/td[2]', 0)->plaintext);
        $item['tr9'] = trim($article->find('/tbody/tr[9]/td[2]', 0)->plaintext);
        $item['tr10'] = trim($article->find('/tbody/tr[10]/td[2]', 0)->plaintext);
        $item['tr11'] = trim($article->find('/tbody/tr[11]/td[2]', 0)->plaintext);
         $item['tr12'] = trim($article->find('/tbody/tr[12]/td/div/]', 0)->plaintext);
       $ret[] = $item;
    }

    // clean up memory
    $html->clear();
    unset($html);

    return $ret;}
}
echo '<pre>';
$ret = scraping();


foreach($ret as $v) {
    echo $v['titlu'].'<br>';

    echo $v['tr2'].'<br>';
    echo $v['tr3'].'<br>';
    echo $v['tr4'].'<br>';
    echo $v['tr5'].'<br>';
    echo $v['tr6'].'<br>';
    echo $v['tr7'].'<br>';
    echo $v['tr8'].'<br>';
    echo $v['tr9'].'<br>';
    echo $v['tr10'].'<br>';
    echo $v['tr11'].'<br>';
    echo $v['tr12'].'<br>';

}
?>
Jay Blanchard
  • 34,243
  • 16
  • 77
  • 119
Ene Mihai
  • 42
  • 5
  • What is the result of $html when it's deployed to the server? – zeflex May 07 '15 at 22:21
  • When placed on server, it turns up blank. Otherwise on localhost the script works perfectly. – Ene Mihai May 07 '15 at 22:26
  • Try curl instead of file_get_html : http://stackoverflow.com/questions/18667441/simple-html-dom-file-get-html-not-working-is-there-any-workaround – zeflex May 07 '15 at 22:27

1 Answers1

1

Curl would give you cleaner output, you could try using Guzzle to make the code easier to write, it should support all the functionality you need.

In terms of writing to Excel, there is a great PHP library to write to Excel in PHP - PHPExcel

Or if you want to write to a text file, thats event easier, just combine all the text you want into a string and run file_put_contents('PATHTOFILE', $multiPageContents);, just make sure the filepath you are pointing to is writable by the user running PHP.