0

I use simple_html_dom to parser html,follow is my core code

set_time_limit(10000);
foreach ($urlList as $url) {
    ini_set('default_socket_timeout', 5);

    $context = stream_context_create(
        array(
            'http'=>array(
                'method' => 'GET', 
                'timeout' => 5
            ),
        )
    );
    $shd->load_file($url, false, $context);

    var_dump(0);

    $html = $shd->find("table");

    ...
}

But it doesn't work for load_file() timeout, only stop script when over 10000 seconds set_time_limit(10000);

I hope load_file skip to the next task when the current task over 5 seconds,is there a way to get it?

Jack Sun
  • 277
  • 1
  • 3
  • 14

1 Answers1

0

Finally i use curl to get content,then use simple_html_dom to deal the content.

function get_html_by_curl($url, $timeout = 5) {
     $ch = curl_init();

     curl_setopt($ch, CURLOPT_URL, $url);
     curl_setopt($ch, CURLOPT_HEADER, false);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);

     curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
     curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);

     $html = curl_exec($ch);     
     if (false === $html) {
         return false;
     }

     if (200 != curl_getinfo($ch, CURLINFO_HTTP_CODE)) {
         return false;
     }

     return $html;
 }
 $content = get_html_by_curl('http://www.google.com', 5); $i = 0;
 while($i<3&&!$content) {
     $content = get_html_by_curl('http://www.google.com', 5);
      $i++; }

 if (false !== $html) {
     $shd->load($content ); 
}
Jack Sun
  • 277
  • 1
  • 3
  • 14