采集医院挂号导诊数据的源代码,php cli方式执行

来源:互联网 发布:sms水动力学软件下载 编辑:程序博客网 时间:2024/04/27 17:47

采集医院挂号导诊数据的源代码,文件请保存为utf-8版本,php cli方式执行
仅供学习参考用

<?php$curl=curl_init();function curl_get_contents($url){    global $curl;    curl_setopt_array($curl,array(        CURLOPT_URL=>$url,        CURLOPT_HTTPHEADER=>array('Connection: Keep-Alive','Keep-Alive: 300'),        CURLOPT_ENCODING=>'gzip',        CURLOPT_RETURNTRANSFER=>true,    ));    return curl_exec($curl);}function curl_post($url,$post){    global $curl;    curl_setopt_array($curl,array(        CURLOPT_URL=>$url,        CURLOPT_HTTPHEADER=>array('Connection: Keep-Alive','Keep-Alive: 300'),        CURLOPT_ENCODING=>'gzip',        CURLOPT_POST=>true,        CURLOPT_POSTFIELDS=>$post,        CURLOPT_RETURNTRANSFER=>true,    ));    return curl_exec($curl);}function choosediagnose($uuid,$isok){    $data=curl_post('http://app.wy.guahao.com/guide/chooseDiagnose',http_build_query(array(        'isSelect'=>$isok,        'diagnoseUuid'=>$uuid,    )));    $data=json_decode($data,true);    if($data['data']['diagnoseItem']['isEnd']==0)//还未结束    {        diagnose_each($data['data']['diagnoseItem']['diagnoseContent'],$data['data']['diagnoseItem']['diagnoseUuid'],$data);    }    else    {        $deptlist=array();        foreach($data['data']['diagnoseItem']['deptInfoItemList'] as $k=>$v)        {            $deptlist[]=$v['deptName'];        }        $data=array(            'diagnose'=>$data['data']['diagnoseItem']['diagnoseContent'],            'deptlist'=>$deptlist,        );        echo iconv('utf-8','gb2312','诊断:'.$data['diagnose'])."\r\n";    }    return $data;}function diagnose_each($content,$uuid,&$array)//遍历不同分支{    $array=array(        'diagnose'=>$content,        'yes'=>(printf(iconv('utf-8','gb2312','是:'.$content."\r\n"))?array():array())+choosediagnose($uuid,1),        'no'=>(printf(iconv('utf-8','gb2312','否:'.$content."\r\n"))?array():array())+choosediagnose($uuid,0),    );}$symptoms_array=array();$sex_array=array(    1=>'男',    2=>'女',);$body_array=array(    1=>'头部',    2=>'颈部',    3=>'胸部',    4=>'腹部',    5=>'四肢',    6=>'皮肤',    7=>'生殖器',    8=>'排泄部',    9=>'背部',    10=>'耳眼口鼻',    11=>'上肢',    12=>'下肢',    13=>'其他',);foreach($sex_array as $sex => $sex_text){    foreach($body_array as $part => $part_text)    {        $html=curl_get_contents("http://app.wy.guahao.com/guide/symptoms?sex={$sex}&partId={$part}");        preg_match_all('/<a.*?href\="(.*?)".*?>(.*?)<\/a>/is',$html,$matches);        foreach($matches[0] as $k=>$v)        {            $symptoms_array[$sex_text][$part_text][$matches[2][$k]]=$matches[1][$k];            echo iconv('utf-8','gb2312','获取病情:'.$sex_text.':'.$part_text.':'.$matches[2][$k])."\r\n";        }    }}$datas=$symptoms_array;foreach($datas as $sex => $bodys){    foreach($bodys as $body => $symptoms)    {        foreach($symptoms as $symptom => $link)        {            echo iconv('utf-8','gb2312','[===>'.$sex.':'.$body.':'.$symptom.'<===]')."\r\n";            $html=curl_get_contents($link);            preg_match('/diagnose\.diagnoseContent\s\|\|\s\'(.*?)\'/is',$html,$matches);            $diagnose=$matches[1];            preg_match('/diagnose\.diagnoseUuid\s\|\|\s\'(.*?)\'/is',$html,$matches);            $uuid=$matches[1];            diagnose_each($diagnose,$uuid,$datas[$sex][$body][$symptom]);        }    }}file_put_contents('symptoms.json',json_encode($datas));file_put_contents('symptoms.print.txt',print_r($datas,true));?>
0 0
原创粉丝点击