CURL
/**
* curl例子
* @param $query
* [
* 'url'=>'www.baidu.com', //链接
* 'timeput'=>30, //超时时间 秒
* 'headers'=>'' //请求头信息
* 'postData'=>'' //请求数据
* 'proxy'=>'127.0.0.1:8888' //代理
* ]
*/
function curlSetting($query)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $query['url']); // 设置url
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // 返回请求结果,不直接输出
curl_setopt($ch, CURLOPT_HEADER, true); // 请求结果中也要包含响应的heade0r
// 设置超时
$timeout = isset($query['timeout']) ?: 30;
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // 允许跳转
curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024);
// curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
// curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
// 设置请求HEADER
if (isset($query['headers'])) {
if (!isAssocArray($query['headers'])) {
curl_setopt($ch, CURLOPT_HTTPHEADER, $query['headers']);
} else {
$headers = [];
foreach ($query['headers'] as $k => $v) $headers[] = "{$k}: {$v}";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
}
}
// 设置post及其数据
if (isset($query['postData']) && !is_null($query['postData'])) {
curl_setopt($ch, CURLOPT_POST, true);
if (is_string($query['postData'])) {
curl_setopt($ch, CURLOPT_POSTFIELDS, $query['postData']);
} elseif (is_array($query['postData'])) {
$queryString = http_build_query($query['postData']);
// 有可能出现为应对 hobbies[]=swimming&hobbies[]=football 这种情况而出现的 hobbies[0]=swimming&hobbies[1]=football
$queryString = preg_replace('/%5B\d+%5D/simU', '%5B%5D', $queryString);
curl_setopt($ch, CURLOPT_POSTFIELDS, $queryString);
}
}
// 设置代理
!empty($query['proxy']) && curl_setopt($ch, CURLOPT_PROXY, $query['proxy']);
$response = curl_exec($ch);
$curlInfo = curl_getinfo($ch);
var_dump($curlInfo);
}
curlMulti
// 多线程抓取
class CurlMulti
{
protected static $mh = null;
// task中需要包含url header postdata proxy timeout
public static function send(array $tasks)
{
// 添加任务到并行句柄中
foreach ($tasks as $task) {
$ch = self::taskSetting($task['url'], $task['headers'], $task['post_data'], $task['proxy'], $task['timeout']);
curl_multi_add_handle(self::getMh(), $ch);
}
$result = [];
do {
curl_multi_exec(self::getMh(), $running);
if (($info = curl_multi_info_read(self::getMh())) !== false) {
if ($info['result'] === CURLE_OK) {
$curlInfo = curl_getinfo($info['handle']);
$response = curl_multi_getcontent($info['handle']);
$body = substr($response, - $curlInfo['size_download']);
$header = substr($response, 0, strlen($response) - $curlInfo['size_download']);
$taskCur = $tasks[array_search($curlInfo['url'], array_column($tasks, 'url'))];
if (!empty($taskCur['callback'])) $body = call_user_func_array($taskCur['callback'], [$body]);
$result[] = [
'code' => $curlInfo['http_code'],
'header' => $header,
'body' => empty($taskCur['callback']) ? $body : call_user_func_array($taskCur['callback'], [$body]),
'curlInfo' => $curlInfo,
];
}
curl_close($info['handle']);
curl_multi_remove_handle(self::getMh(), $info['handle']);
}
} while ($running > 0);
return $result;
}
protected static function taskSetting($url, $headers = [], $postData = null, $proxy = '', $timeout = 30)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
!empty($headers) && curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
if (!is_null($postData)) {
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
}
if (!empty($proxy)) curl_setopt($ch, CURLOPT_PROXY, $proxy);
return $ch;
}
protected static function getMh()
{
if (is_null(self::$mh)) {
self::$mh = curl_multi_init();
}
return self::$mh;
}
}