什么是正确的例子(最新的方法)使用卷曲-多?我使用了下面的代码,但是很多时候,它无法获得内容(返回空的结果,而且我也没有经验如何检索正确的repsonse/错误):
public function multi_curl($urls)
{
$AllResults =[];
$mch = curl_multi_init();
$handlesArray=[];
$curl_conn_timeout= 3 *60; //max 3 minutes
$curl_max_timeout = 30*60; //max 30 minutes
foreach ($urls as $key=> $url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_HEADER, false);
// timeouts: https://thisinterestsme.com/php-setting-curl-timeout/ and https://stackoverflow.com/a/15982505/2377343
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $curl_conn_timeout);
curl_setopt($ch, CURLOPT_TIMEOUT, $curl_max_timeout);
if (defined('CURLOPT_TCP_FASTOPEN')) curl_setopt($ch, CURLOPT_TCP_FASTOPEN, 1);
curl_setopt($ch, CURLOPT_ENCODING, ""); // empty to autodetect | gzip,deflate
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_URL, $url);
$handlesArray[$key] = $ch;
curl_multi_add_handle($mch, $handlesArray[$key]);
}
// other approaches are deprecated ! https://stackoverflow.com/questions/58971677/
do {
$execReturnValue = curl_multi_exec($mch, $runningHandlesAmount);
usleep(100); // stop 100 microseconds to avoid infinity speed recursion
} while ($runningHandlesAmount>0);
//exec now
foreach($urls as $key => $url)
{
$AllResults[$key]['url'] =$url;
$handle = $handlesArray[$key];
// Check for errors
$curlError = curl_error($handle);
if ($curlError!="")
{
$AllResults[$key]['error'] =$curlError;
$AllResults[$key]['response'] =false;
}
else {
$AllResults[$key]['error'] =false;
$AllResults[$key]['response'] =curl_multi_getcontent($handle);
}
curl_multi_remove_handle($mch, $handle); curl_close($handle);
}
curl_multi_close($mch);
return $AllResults;
}并执行:
$urls = [ 'https://baconipsum.com/api/?type=meat-and-filler',
'https://baconipsum.com/api/?type=all-meat¶s=2'];
$results = $helpers->multi_curl($urls);有什么东西,可以改变,有更好的结果吗?
更新:我发现这个储存库还提到缺乏关于multi-curl最佳用例的文档,并提供了它们的方法。然而,我要求这样做也是为了得到其他有能力的答案。
发布于 2021-05-27 10:49:22
我使用以下代码
该守则存在以下问题:
这个实现应该大大加快,对最大同时连接有一个可配置的限制,重用卷发句柄,尽快移除已完成的工作人员,检测curl_multi错误等等。
/**
* fetch all urls in parallel,
* warning: all urls must be unique..
*
* @param array $urls_unique
* urls to fetch
* @param int $max_connections
* (optional, default 100) max simultaneous connections
* (some websites will auto-ban you for "ddosing" if you send too many requests simultaneously,
* and some wifi routers will get unstable on too many connectionis.. )
* @param array $additional_curlopts
* (optional) set additional curl options here, each curl handle will get these options
* @throws RuntimeException on curl_multi errors
* @throws RuntimeException on curl_init() / curl_setopt() errors
* @return array(url=>response,url2=>response2,...)
*/
function curl_fetch_multi_2(array $urls_unique, int $max_connections = 100, array $additional_curlopts = null)
{
// $urls_unique = array_unique($urls_unique);
$ret = array();
$mh = curl_multi_init();
// $workers format: [(int)$ch]=url
$workers = array();
$max_connections = min($max_connections, count($urls_unique));
$unemployed_workers = array();
for ($i = 0; $i < $max_connections; ++ $i) {
$unemployed_worker = curl_init();
if (! $unemployed_worker) {
throw new \RuntimeException("failed creating unemployed worker #" . $i);
}
$unemployed_workers[] = $unemployed_worker;
}
unset($i, $unemployed_worker);
$work = function () use (&$workers, &$unemployed_workers, &$mh, &$ret): void {
assert(count($workers) > 0, "work() called with 0 workers!!");
$still_running = null;
for (;;) {
do {
$err = curl_multi_exec($mh, $still_running);
} while ($err === CURLM_CALL_MULTI_PERFORM);
if ($err !== CURLM_OK) {
$errinfo = [
"multi_exec_return" => $err,
"curl_multi_errno" => curl_multi_errno($mh),
"curl_multi_strerror" => curl_multi_strerror($err)
];
$errstr = "curl_multi_exec error: " . str_replace([
"\r",
"\n"
], "", var_export($errinfo, true));
throw new \RuntimeException($errstr);
}
if ($still_running < count($workers)) {
// some workers has finished downloading, process them
// echo "processing!";
break;
} else {
// no workers finished yet, sleep-wait for workers to finish downloading.
// echo "select()ing!";
curl_multi_select($mh, 1);
// sleep(1);
}
}
while (false !== ($info = curl_multi_info_read($mh))) {
if ($info['msg'] !== CURLMSG_DONE) {
// no idea what this is, it's not the message we're looking for though, ignore it.
continue;
}
if ($info['result'] !== CURLM_OK) {
$errinfo = [
"effective_url" => curl_getinfo($info['handle'], CURLINFO_EFFECTIVE_URL),
"curl_errno" => curl_errno($info['handle']),
"curl_error" => curl_error($info['handle']),
"curl_multi_errno" => curl_multi_errno($mh),
"curl_multi_strerror" => curl_multi_strerror(curl_multi_errno($mh))
];
$errstr = "curl_multi worker error: " . str_replace([
"\r",
"\n"
], "", var_export($errinfo, true));
throw new \RuntimeException($errstr);
}
$ch = $info['handle'];
$ch_index = (int) $ch;
$url = $workers[$ch_index];
$ret[$url] = curl_multi_getcontent($ch);
unset($workers[$ch_index]);
curl_multi_remove_handle($mh, $ch);
$unemployed_workers[] = $ch;
}
};
$opts = array(
CURLOPT_URL => '',
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_ENCODING => ''
);
if (! empty($additional_curlopts)) {
// i would have used array_merge(), but it does scary stuff with integer keys.. foreach() is easier to reason about
foreach ($additional_curlopts as $key => $val) {
$opts[$key] = $val;
}
}
foreach ($urls_unique as $url) {
while (empty($unemployed_workers)) {
$work();
}
$new_worker = array_pop($unemployed_workers);
$opts[CURLOPT_URL] = $url;
if (! curl_setopt_array($new_worker, $opts)) {
$errstr = "curl_setopt_array failed: " . curl_errno($new_worker) . ": " . curl_error($new_worker) . " " . var_export($opts, true);
throw new RuntimeException($errstr);
}
$workers[(int) $new_worker] = $url;
curl_multi_add_handle($mh, $new_worker);
}
while (count($workers) > 0) {
$work();
}
foreach ($unemployed_workers as $unemployed_worker) {
curl_close($unemployed_worker);
}
curl_multi_close($mh);
return $ret;
}发布于 2021-05-27 01:39:25
我强烈建议查看口香糖库。
它允许您以面向对象的方式执行异步curl请求。
基本例子:
class ExampleASyncRequester
{
private Client $client;
private array $responses;
private array $urlsToRequest;
private array $requestPromises;
public function __construct(array $urlsToRequest)
{
$this->client = new Client();
$this->responses = [];
$this->requestPromises = [];
$this->urlsToRequest = $urlsToRequest;
}
public function doRequests(): void
{
foreach ($this->urlsToRequest as $urlToRequest) {
$promise = $this->client->getAsync($urlToRequest);
// When we get a response, add it to our array
$promise->then(
function(ResponseInterface $response) {
$this->responses[] = $response;
}
);
$this->requestPromises[] = $promise;
}
// Wait for all of the promises to either succeed or fail
Utils::settle($this->requestPromises)->wait();
}
public function getResponses(): array
{
return $this->responses;
}
}
$requestInstance = new ExampleASyncRequester([
'https://www.google.com',
'https://www.google.com',
'https://www.google.com',
'https://www.google.com',
'https://www.google.com',
]);
$requestInstance->doRequests();
// For loop through our responses and dump the bodies
/** @var ResponseInterface $response */
foreach ($requestInstance->getResponses() as $response) {
var_dump($response->getBody()->getContents());
}https://stackoverflow.com/questions/58570054
复制相似问题