-3

I was coded a php bot. Now I want send a 8 million request. Yeah for example:

I use multi_curl* but problems.

I use linux parallels library problems.

Maybe you have any suggestions ?

Php 7.1.1 , Linux Ubuntu 16.0.1

myfnc(){
    i=1264609
 echo "$(($1+i))";
 response=$(curl --write-out %{http_code} --silent --output /dev/null http://localhost/botum/index.php?i=$(($1+i)))
 echo $response
}
export -f myfnc
seq 100 | parallel -j0 myfnc

Multi curl problems :

set_time_limit(0);
ini_set("max_execution_time",-1);
$nodes = array();

for($i =1366295;$i<1396296;$i++){
    array_push($nodes,"http://165.227.152.138/botum2/index.php?i=$i");
}

$node_count = count($nodes);

$curl_arr = array();
$master = curl_multi_init();

for($i = 0; $i < $node_count; $i++)
{
    $url =$nodes[$i];
 //problems code start
    $curl_arr[$i] = curl_init($url);
    curl_setopt($curl_arr[$i], CURLOPT_RETURNTRANSFER, true);
//problems code end
    curl_multi_add_handle($master, $curl_arr[$i]);

}

do {
    curl_multi_exec($master,$running);
} while($running > 0);


for($i = 0; $i < $node_count; $i++)
{
    $results[] = curl_multi_getcontent  ( $curl_arr[$i]  );
}
print_r($results);
  • Your code looks weird, can you edit and post it again? – Fabio William Conceição Jun 02 '19 at 12:31
  • I edited. This code just terminal request trying. 100 no problem. But 100000 big problem. – Halil Arıcı Jun 02 '19 at 12:39
  • *"I use multi_curl\* but problems..."* - You need to explain your problem. Also see [How to create a Minimal, Complete, and Verifiable example](http://stackoverflow.com/help/mcve). – jww Jun 02 '19 at 12:40
  • 1
    your curl code is trying to start 1.3 million curl handles **SIMULTANEOUSLY**, and will obviously run out of resources (but you don't catch that because you don't check the return value of curl_init(), if you had done something like `if(!($curl_arr[$i] = curl_init($url))){throw new \RuntimeException("curl_init failed!");}` you would have noticed it) – hanshenrik Jun 02 '19 at 13:08

1 Answers1

1

your curl code is trying to start 1.3 million curl handles SIMULTANEOUSLY, and will obviously run out of resources (but you don't catch that because you don't check the return value of curl_init(), if you had done something like if(!($curl_arr[$i] = curl_init($url))){throw new \RuntimeException("curl_init failed!");} then you would have noticed it)

furthermore you're using a busy loop here

do {
    curl_multi_exec($master,$running);
} while($running > 0);

meaning you'll be using 100% cpu while the handles are executing, for no goddamn reason, while you should have been waiting with curl_multi_select.

this is a job for curl_multi, but you're just using it wrong. my suggestion is to just slightly modify the code from Which performs faster, headless browser or Curl? ,

this will do 8 million requests, print the responses as they are getting completed, and never use more than 500 connections simultaneously, and use an async select() approach to not use any cpu while waiting for network IO,

curl_multi_fetch_and_print("http://165.227.152.138/botum2/index.php?i=",8000000,500,10000,true,true);
function curl_multi_fetch_and_print(string $base_url, int $count, int $max_connections, int $timeout_ms = 10000, bool $consider_http_300_redirect_as_error = true, bool $print_fault_reason): void
{
    if ($max_connections < 1) {
        throw new InvalidArgumentException("max_connections MUST be >=1");
    }
    if ($count < 1) {
        throw new InvalidArgumentException("count MUST be >=1");
    }
    $mh = curl_multi_init();
    $workers = array();
    $work = function () use (&$workers, &$mh, &$print_fault_reason) {
        // > If an added handle fails very quickly, it may never be counted as a running_handle
        while (1) {
            curl_multi_exec($mh, $still_running);
            if ($still_running < count($workers)) {
                break;
            }
            $cms = curl_multi_select($mh, 10);
            //var_dump('sr: ' . $still_running . " c: " . count($workers)." cms: ".$cms);
        }
        while (false !== ($info = curl_multi_info_read($mh))) {
            //echo "NOT FALSE!";
            //var_dump($info);
            {
                if ($info['msg'] !== CURLMSG_DONE) {
                    continue;
                }
                if ($info['result'] !== CURLM_OK) {
                    if ($print_fault_reason) {
                        echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, $info['result'], "curl_exec error " . $info['result'] . ": " . curl_strerror($info['result'])), true) . PHP_EOL;
                    }
                } elseif (CURLE_OK !== ($err = curl_errno($info['handle']))) {
                    if ($print_fault_reason) {
                        echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, $err, "curl error " . $err . ": " . curl_strerror($err)), true) . PHP_EOL;
                    }
                } else {
                    $code = (string)curl_getinfo($info['handle'], CURLINFO_HTTP_CODE);
                    if ($code[0] === "3") {
                        if ($consider_http_300_redirect_as_error) {
                            if ($print_fault_reason) {
                                echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, -1, "got a http " . $code . " redirect, which is considered an error"), true) . PHP_EOL;
                            }
                        } else {
                            //if ($print_fault_reason) {
                            //    echo "request #" . ($workers[(int)$info['handle']]) . " success: " . print_r(array(true, 0, "got a http " . $code . " redirect, which is considered a success"), true).PHP_EOL;
                            //} else {
                            // ... got a http redirect, which is not considered an errror,
                            echo "request #" . ($workers[(int)$info['handle']]) . " success: (http {$code} redirect)\n";
                            //}
                        }
                    } elseif ($code[0] === "2") {
                        if ($print_fault_reason) {
                            echo "request #" . ($workers[(int)$info['handle']]) . " success: http {$code}: " . curl_multi_getcontent($info['handle']) . PHP_EOL;
                        } else {
                            echo "request #" . ($workers[(int)$info['handle']]) . ": " . curl_multi_getcontent($info['handle']) . PHP_EOL;
                        }
                    } else {
                        // all non-2xx and non-3xx are always considered errors (500 internal server error, 400 client error, 404 not found, etcetc)
                        if ($print_fault_reason) {
                            echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, -1, "got a http " . $code . " code, which is considered an error"), true) . PHP_EOL;
                        }
                    }
                }
                curl_multi_remove_handle($mh, $info['handle']);
                assert(isset($workers[(int)$info['handle']]));
                unset($workers[(int)$info['handle']]);
                curl_close($info['handle']);
            }
        }
        //echo "NO MORE INFO!";
    };
    for ($i = 0; $i < $count; ++$i) {
        $url = $base_url . $i;
        while (count($workers) >= $max_connections) {
            //echo "TOO MANY WORKERS!\n";
            $work();
        }
        $neww = curl_init($url);
        if (!$neww) {
            trigger_error("curl_init() failed! probably means that max_connections is too high and you ran out of resources", E_USER_WARNING);
            if ($print_fault_reason) {
                echo "request #{$i} error: curl_init() failed!" . PHP_EOL;
            }
            continue;
        }
        $workers[(int)$neww] = $url;
        curl_setopt_array($neww, array(
            //CURLOPT_NOBODY => 1,
            CURLOPT_RETURNTRANSFER=>1,
            CURLOPT_SSL_VERIFYHOST => 0,
            CURLOPT_SSL_VERIFYPEER => 0,
            CURLOPT_TIMEOUT_MS => $timeout_ms
        ));
        curl_multi_add_handle($mh, $neww);
        //curl_multi_exec($mh, $unused_here); LIKELY TO BE MUCH SLOWER IF DONE IN THIS LOOP: TOO MANY SYSCALLS
    }
    while (count($workers) > 0) {
        //echo "WAITING FOR WORKERS TO BECOME 0!";
        //var_dump(count($workers));
        $work();
    }
    curl_multi_close($mh);
    return;
}
hanshenrik
  • 19,904
  • 4
  • 43
  • 89
  • @HalilArıcı btw even 500 connections may be excessive, maybe 100 or even 50 works better, benchmark it if it's worth the hassle – hanshenrik Jun 02 '19 at 18:11