<?php

/**
 * 爬取 国外高校犯罪信息数据
 * Class SchoolSpider
 */
class SchoolSpider
{
    private $schoolName;
    //学校名检查api
    private $schoolApi = 'https://ope.ed.gov/campussafety/api/institution/names?filter=';
    //校区
    private $schoolSerachApi = 'https://ope.ed.gov/campussafety/api/institution/search';
    //数据  校区
    private $dataApi = 'https://ope.ed.gov/campussafety/api/campus/';
    //整个学校
    private $totalApi = 'https://ope.ed.gov/campussafety/api/institution/';
    //校区
    private $campisesData;
    //详情数据
    private $dataDetail;
    //详情数据
    public $error;

    public $result=[];

    public function __construct()
    {
        //请求时间不限
        set_time_limit(0);
    }

    public function spiderData($schoolName)
    {
        $this->schoolName = $schoolName;
        $this->result['school_name'] = $this->schoolName;
        if (!$this->checkName() || !$this->getCampises()) {
            return ['code' => -1, 'msg' => $this->error];
        }
        //总的数据
        $this->formatData($this->campisesData);
        //各个分校区数据
        $this->formatData($this->campisesData[0]['Campuses'],1);

        return ['code' => 0, 'msg' => $this->error,'data'=>$this->result];
    }


    /**
     * @name  数据解析
     * @param $campisesData 数据
     * @param $isCampise  是否是分校区数据
     */
    private function formatData($campisesData,$isCampise=false){

        foreach ($campisesData as $campise) {
            if($isCampise){
                //校区名
                $res = $this->getDetailCampuse($campise['UnitID']);
            }else{
                $res = $this->getDetail($campise['UnitID']);
            }

            if(!$res||isset($this->result[$campise['UnitID']])){
                continue;
            }
            $dataDetail = $this->dataDetail;
            //校名
            $this->result[$campise['UnitID']]['campuse'] = $campise['Name'];
            foreach ($dataDetail as $k => $data) {
                if ($k == 0)
                    continue;
                $this->result[$campise['UnitID']]['data'][] = empty($data) ? [] : array_column($data['Cells'], 'Html');
            }
        }
    }

    /**
     * 检查学校名
     * @return array
     */
    private function checkName()
    {
        $school = json_decode($this->httpRequest($this->schoolApi . urlencode($this->schoolName)), true);
        if (!isset($school[0]['Code']) || !$school[0]['Code']) {
            $this->error = '检查学校名 error';
            return false;
        }
        return true;
    }

    /**
     * 获取校区
     * @return array
     */
    private function getCampises()
    {
        $schoolSerachData = "{\"name\":\"" . $this->schoolName . "\",\"city\":\"\",\"state\":[],\"country\":[],\"countryNames\":[],\"institutionType\":[],\"institutionProgram\":[],\"campusLocation\":\"-1\",\"onlyResidentialCampuses\":false,\"enrollmentRange\":[],\"sort\":\"name\",\"sortDirection\":\"asc\",\"all\":false,\"pageNumber\":0,\"fromFavorites\":false}";
        //获取校区列表
        $campisesData = json_decode($this->httpRequest($this->schoolSerachApi, $schoolSerachData), true);

        if (!isset($campisesData['Results'][0]['Campuses']) || empty($campisesData['Results'][0]['Campuses'])) {
            $this->error = '获取校区 error';
            return false;
        }
        $this->campisesData = $campisesData['Results'];
        return true;
    }

    /**
     * @name 获取全部详情
     * @param $unitId
     * @return array
     */
    private function getDetail($unitId)
    {

        $dataDetail = json_decode($this->httpRequest($this->totalApi . $unitId), true);

        if (!isset($dataDetail['Groups'][0]['Screens'][0]['Rows']) || empty($dataDetail['Groups'][0]['Screens'][0]['Rows'])) {
            $this->error = '获取详情 error';
            return false;
        }
        $this->dataDetail = $dataDetail['Groups'][0]['Screens'][0]['Rows'];
        return true;
    }

    /**
     * @name 获取分校区详情
     * @param $unitId
     * @return array
     */
    private function getDetailCampuse($unitId)
    {

        $dataDetail = json_decode($this->httpRequest($this->dataApi . $unitId), true);

        if (!isset($dataDetail['Groups'][0]['Screens'][0]['Rows']) || empty($dataDetail['Groups'][0]['Screens'][0]['Rows'])) {
            $this->error = '获取详情 error';
            return false;
        }
        $this->dataDetail = $dataDetail['Groups'][0]['Screens'][0]['Rows'];
        return true;
    }

    /**
     * @name 请求方法
     * @param $url
     * @param $data
     * @return false|mixed|string
     */
    private function httpRequest($url, $data = '')
    {
        $curl = curl_init();
        $method = $data ? 'POST' : 'GET';
        curl_setopt_array($curl, array(
            CURLOPT_URL => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_ENCODING => "",
            CURLOPT_MAXREDIRS => 10,
            CURLOPT_TIMEOUT => 30,
            CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
            CURLOPT_CUSTOMREQUEST => $method,
            CURLOPT_POSTFIELDS => $data,
            CURLOPT_HTTPHEADER => array(
                "Content-Type: application/json;charset=UTF-8",
                "Postman-Token: a30f51b0-cc09-4bfb-801e-830a931994f4",
                "cache-control: no-cache"
            ),
        ));

        $response = curl_exec($curl);
        $err = curl_error($curl);

        curl_close($curl);

        if ($err) {
            echo "cURL Error #:" . $err;
            return json_encode([]);
        } else {
            return strip_tags($response);
        }
    }
}

$a = new  SchoolSpider();
$r = $a->spiderData('Harvard University');
echo  json_encode($r);

标签: none

添加新评论