your curl code is trying to start 1.3 million curl handles SIMULTANEOUSLY, and will obviously run out of resources (but you don't catch that because you don't check the return value of curl_init(), if you had done something like if(!($curl_arr[$i] = curl_init($url))){throw new \RuntimeException("curl_init failed!");}
then you would have noticed it)
furthermore you're using a busy loop here
do {
curl_multi_exec($master,$running);
} while($running > 0);
meaning you'll be using 100% cpu while the handles are executing, for no goddamn reason, while you should have been waiting with curl_multi_select.
this is a job for curl_multi, but you're just using it wrong. my suggestion is to just slightly modify the code from Which performs faster, headless browser or Curl? ,
this will do 8 million requests, print the responses as they are getting completed, and never use more than 500 connections simultaneously, and use an async select() approach to not use any cpu while waiting for network IO,
curl_multi_fetch_and_print("http://165.227.152.138/botum2/index.php?i=",8000000,500,10000,true,true);
function curl_multi_fetch_and_print(string $base_url, int $count, int $max_connections, int $timeout_ms = 10000, bool $consider_http_300_redirect_as_error = true, bool $print_fault_reason): void
{
if ($max_connections < 1) {
throw new InvalidArgumentException("max_connections MUST be >=1");
}
if ($count < 1) {
throw new InvalidArgumentException("count MUST be >=1");
}
$mh = curl_multi_init();
$workers = array();
$work = function () use (&$workers, &$mh, &$print_fault_reason) {
// > If an added handle fails very quickly, it may never be counted as a running_handle
while (1) {
curl_multi_exec($mh, $still_running);
if ($still_running < count($workers)) {
break;
}
$cms = curl_multi_select($mh, 10);
//var_dump('sr: ' . $still_running . " c: " . count($workers)." cms: ".$cms);
}
while (false !== ($info = curl_multi_info_read($mh))) {
//echo "NOT FALSE!";
//var_dump($info);
{
if ($info['msg'] !== CURLMSG_DONE) {
continue;
}
if ($info['result'] !== CURLM_OK) {
if ($print_fault_reason) {
echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, $info['result'], "curl_exec error " . $info['result'] . ": " . curl_strerror($info['result'])), true) . PHP_EOL;
}
} elseif (CURLE_OK !== ($err = curl_errno($info['handle']))) {
if ($print_fault_reason) {
echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, $err, "curl error " . $err . ": " . curl_strerror($err)), true) . PHP_EOL;
}
} else {
$code = (string)curl_getinfo($info['handle'], CURLINFO_HTTP_CODE);
if ($code[0] === "3") {
if ($consider_http_300_redirect_as_error) {
if ($print_fault_reason) {
echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, -1, "got a http " . $code . " redirect, which is considered an error"), true) . PHP_EOL;
}
} else {
//if ($print_fault_reason) {
// echo "request #" . ($workers[(int)$info['handle']]) . " success: " . print_r(array(true, 0, "got a http " . $code . " redirect, which is considered a success"), true).PHP_EOL;
//} else {
// ... got a http redirect, which is not considered an errror,
echo "request #" . ($workers[(int)$info['handle']]) . " success: (http {$code} redirect)\n";
//}
}
} elseif ($code[0] === "2") {
if ($print_fault_reason) {
echo "request #" . ($workers[(int)$info['handle']]) . " success: http {$code}: " . curl_multi_getcontent($info['handle']) . PHP_EOL;
} else {
echo "request #" . ($workers[(int)$info['handle']]) . ": " . curl_multi_getcontent($info['handle']) . PHP_EOL;
}
} else {
// all non-2xx and non-3xx are always considered errors (500 internal server error, 400 client error, 404 not found, etcetc)
if ($print_fault_reason) {
echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, -1, "got a http " . $code . " code, which is considered an error"), true) . PHP_EOL;
}
}
}
curl_multi_remove_handle($mh, $info['handle']);
assert(isset($workers[(int)$info['handle']]));
unset($workers[(int)$info['handle']]);
curl_close($info['handle']);
}
}
//echo "NO MORE INFO!";
};
for ($i = 0; $i < $count; ++$i) {
$url = $base_url . $i;
while (count($workers) >= $max_connections) {
//echo "TOO MANY WORKERS!\n";
$work();
}
$neww = curl_init($url);
if (!$neww) {
trigger_error("curl_init() failed! probably means that max_connections is too high and you ran out of resources", E_USER_WARNING);
if ($print_fault_reason) {
echo "request #{$i} error: curl_init() failed!" . PHP_EOL;
}
continue;
}
$workers[(int)$neww] = $url;
curl_setopt_array($neww, array(
//CURLOPT_NOBODY => 1,
CURLOPT_RETURNTRANSFER=>1,
CURLOPT_SSL_VERIFYHOST => 0,
CURLOPT_SSL_VERIFYPEER => 0,
CURLOPT_TIMEOUT_MS => $timeout_ms
));
curl_multi_add_handle($mh, $neww);
//curl_multi_exec($mh, $unused_here); LIKELY TO BE MUCH SLOWER IF DONE IN THIS LOOP: TOO MANY SYSCALLS
}
while (count($workers) > 0) {
//echo "WAITING FOR WORKERS TO BECOME 0!";
//var_dump(count($workers));
$work();
}
curl_multi_close($mh);
return;
}