0

I am sending Curl requests to a bunch of 50 sites to check their running status using Crons job. I however every time manage to get response of first 10-15 sites. I think that after that server might gets overloaded and stops executing.

Example Code:

      foreach ($sites as $site) {
       if(Visit("http://www.domain.com")) {
           //saving in database
        }
       }

function Visit($url)

 {

$agent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";$ch=curl_init();

 curl_setopt ($ch, CURLOPT_URL,$url );

 curl_setopt($ch, CURLOPT_USERAGENT, $agent);

 curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);

 curl_setopt ($ch,CURLOPT_VERBOSE,false);

 curl_setopt($ch, CURLOPT_TIMEOUT, 5);

 $page=curl_exec($ch);


  $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);

  curl_close($ch);

  if($httpcode>=200 && $httpcode<300) return true;

    else return false;

  }

Please suggest me some techniques to prevent server overloading and manage to get response of all sites.

user1836842
  • 13
  • 1
  • 3

1 Answers1

7

You can try using the following using curl_multi_exec it take only 15.519232988358 to check 100 diff domains

$url = "google.com
facebook.com
youtube.com
yahoo.com
baidu.com
wikipedia.org
live.com
twitter.com
qq.com
amazon.com
taobao.com
linkedin.com
blogspot.com
google.co.in
yahoo.co.jp
sina.com.cn
google.de
yandex.ru
msn.com
wordpress.com
google.co.jp
google.com.hk
bing.com
ebay.com
google.co.uk
google.fr
vk.com
microsoft.com
babylon.com
weibo.com
googleusercontent.com
163.com
tumblr.com
apple.com
mail.ru
pinterest.com
soso.com
google.com.br
tmall.com
google.es
paypal.com
google.ru
google.it
xhamster.com
craigslist.org
sohu.com
blogger.com
fc2.com
imdb.com
go.com
ebay.de
google.co.id
zedo.com
alibaba.com
mywebsearch.com
google.com.tr
adf.ly
stackoverflow.com
blogspot.in
redtube.com
amazon.co.uk
360buy.com
google.com.au
alipay.com
sogou.com
about.com
instagram.com
ebay.co.uk
nytimes.com
livedoor.com
google.pl
netflix.com
imgur.com
uol.com.br
dailymotion.com
wordpress.org
360.cn
cnet.com
godaddy.com
youporn.com
bp.blogspot.com
ameblo.jp
mediafire.com
adcash.com
globo.com
chinaz.com
weather.com
incredibar.com
neobux.com
xnxx.com
google.nl
ehow.com
douban.com
google.com.sa
4shared.com
vimeo.com
livejournal.com
dropbox.com
renren.com
doubleclick.com";

echo "<pre>";
$url = explode("\n", $url);
set_time_limit(0);

$start = microtime(true);
multiplePost($url);
echo PHP_EOL, microtime(true) - $start;

Output

Status 301  for : http://www.google.com/ Error (x)
Status 302  for : https://facebook.com/ Error (x)
Status ok for : http://www.youtube.com/ Done (^)
Status ok for : http://www.yahoo.com/ Done (^)
Status ok for : http://baidu.com Done (^)
Status ok for : http://www.wikipedia.org/ Done (^)
Status 301  for : https://home.live.com/ Error (x)
Status ok for : http://twitter.com Done (^)
Status ok for : http://www.qq.com/ Done (^)
Status ok for : http://www.amazon.com/ Done (^)
Status ok for : http://www.taobao.com/ Done (^)
Status ok for : http://www.linkedin.com/ Done (^)
Status 302  for : https://accounts.google.com/ServiceLogin?service=blogger&passive=1209600&continue=http://www.blogger.com/home&followup=http://www.blogger.com/home<mpl=start Error (x)
Status ok for : http://www.google.co.in/ Done (^)
Status ok for : http://www.yahoo.co.jp/ Done (^)
Status 0  for : http://sina.com.cn Error (x)
Status ok for : http://www.google.de/ Done (^)
Status 301  for : http://www.yandex.ru/ Error (x)
Status ok for : http://www.msn.com/ Done (^)
Status ok for : http://wordpress.com Done (^)
Status ok for : http://www.google.co.jp/ Done (^)
Status ok for : http://www.google.com.hk/ Done (^)
Status ok for : http://www.bing.com/ Done (^)
Status 301  for : http://www.ebay.com Error (x)
Status ok for : http://www.google.co.uk/ Done (^)
Status ok for : http://www.google.fr/ Done (^)
Status ok for : http://vk.com Done (^)
Status ok for : http://www.microsoft.com/en-ng/default.aspx Done (^)
Status ok for : http://www.babylon.com/ Done (^)
Status ok for : http://weibo.com Done (^)
Status 0  for : http://googleusercontent.com Error (x)
Status 0  for : http://163.com Error (x)
Status 302  for : https://www.tumblr.com/ Error (x)
Status ok for : http://www.apple.com/ Done (^)
Status ok for : http://mail.ru Done (^)
Status ok for : http://pinterest.com Done (^)
Status 0  for : http://soso.com Error (x)
Status ok for : http://www.google.com.br/ Done (^)
Status ok for : http://www.tmall.com/ Done (^)
Status ok for : http://www.google.es/ Done (^)
Status 302  for : https://paypal.com/ Error (x)
Status ok for : http://www.google.ru/ Done (^)
Status ok for : http://www.google.it/ Done (^)
Status ok for : http://xhamster.com Done (^)
Status ok for : http://www.craigslist.org/about/sites/ Done (^)
Status 302  for : http://www.sohu.com/ Error (x)
Status 302  for : https://accounts.google.com/ServiceLogin?service=blogger&passive=1209600&continue=http://www.blogger.com/home&followup=http://www.blogger.com/home<mpl=start Error (x)
Status ok for : http://fc2.com Done (^)
Status ok for : http://www.imdb.com/ Done (^)
Status ok for : http://go.com Done (^)
Status 301  for : http://www.ebay.de Error (x)
Status ok for : http://www.google.co.id/ Done (^)
Status ok for : http://www.zedo.com/ Done (^)
Status ok for : http://www.alibaba.com/ Done (^)
Status ok for : http://home.mywebsearch.com/ Done (^)
Status ok for : http://www.google.com.tr/ Done (^)
Status ok for : http://adf.ly Done (^)
Status ok for : http://stackoverflow.com Done (^)
Status 302  for : http://www.google.com/ Error (x)
Status ok for : http://www.redtube.com/ Done (^)
Status ok for : http://www.amazon.co.uk/ Done (^)
Status ok for : http://360buy.com Done (^)
Status ok for : http://www.google.com.au/ Done (^)
Status 301  for : https://www.alipay.com/?src=alipay.com Error (x)
Status ok for : http://www.sogou.com/ Done (^)
Status ok for : http://www.about.com/ Done (^)
Status ok for : http://instagram.com Done (^)
Status 301  for : http://www.ebay.co.uk Error (x)
Status ok for : http://www.nytimes.com/ Done (^)
Status ok for : http://www.livedoor.com/ Done (^)
Status ok for : http://www.google.pl/ Done (^)
Status 301  for : http://www.netflix.com/ Error (x)
Status ok for : http://imgur.com Done (^)
Status ok for : http://www.uol.com.br/ Done (^)
Status 301  for : http://www.dailymotion.com/ Error (x)
Status ok for : http://wordpress.org Done (^)
Status ok for : http://360.cn Done (^)
Status ok for : http://www.cnet.com/ Done (^)
Status ok for : http://www.godaddy.com/ Done (^)
Status ok for : http://www.youporn.com/ Done (^)
Status 0  for : http://bp.blogspot.com Error (x)
Status ok for : http://ameblo.jp Done (^)
Status ok for : http://www.mediafire.com/ Done (^)
Status 301  for : https://www.adcash.com/index.php Error (x)
Status 301  for : http://www.globo.com/ Error (x)
Status ok for : http://chinaz.com Done (^)
Status ok for : http://www.weather.com/ Done (^)
Status ok for : http://incredibar.com/essentials/homepage Done (^)
Status ok for : http://www.neobux.com/ Done (^)
Status 301  for : http://www.xnxx.com/ Error (x)
Status ok for : http://www.google.nl/ Done (^)
Status ok for : http://www.ehow.com/ Done (^)
Status 0  for : http://douban.com Error (x)
Status ok for : http://www.google.com.sa/ Done (^)
Status 301  for : http://www.4shared.com Error (x)
Status ok for : http://vimeo.com Done (^)
Status ok for : http://www.livejournal.com/ Done (^)
Status 302  for : https://www.dropbox.com/ Error (x)
Status ok for : http://renren.com Done (^)
Status ok for : http://www.google.com/doubleclick/ Done (^)

15.519232988358 <--------------- Total Time Taken

Function Used

function multiplePost($nodes) {
    $mh = curl_multi_init();
    $curl_array = array();
    foreach ( $nodes as $i => $domain ) {
        $domain = trim($domain);
        $curl_array[$i] = curl_init("http://$domain");
        curl_setopt($curl_array[$i], CURLOPT_RETURNTRANSFER, true);
        curl_setopt($curl_array[$i], CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 (.NET CLR 3.5.30729)');
        curl_setopt($curl_array[$i], CURLOPT_CONNECTTIMEOUT, 5);
        curl_setopt($curl_array[$i], CURLOPT_TIMEOUT, 15);
        curl_setopt($curl_array[$i], CURLOPT_FOLLOWLOCATION, true);
        curl_multi_add_handle($mh, $curl_array[$i]);
    }

    echo PHP_EOL, PHP_EOL;

    $running = NULL;
    do {
        usleep(10000);
        curl_multi_exec($mh, $running);
    } while ( $running > 0 );
    $res = array();
    foreach ( $nodes as $i => $url ) {

        $curlErrorCode = curl_errno($curl_array[$i]);

        if ($curlErrorCode === 0) {
            $info = curl_getinfo($curl_array[$i]);
            $info['url'] = trim($info['url']);
            if ($info['http_code'] == 200) {
                echo "Status ok for : {$info['url']} Done (^)", PHP_EOL;
                $content = curl_multi_getcontent($curl_array[$i]); // get content
            } else {
                echo "Status {$info['http_code'] }  for : {$info['url']} Error (x)", PHP_EOL;
            }
        }
        curl_multi_remove_handle($mh, $curl_array[$i]);
        curl_close($curl_array[$i]);
        flush();
        ob_flush();
    }
    curl_multi_close($mh);
}
Baba
  • 94,024
  • 28
  • 166
  • 217