验证码识别之中国移动话费查询

来源:互联网 发布:明石家秋刀鱼 知乎 编辑:程序博客网 时间:2024/04/28 02:56

最近公司一个客户要求查询其手里的一批电话号的余额。

而目前我们公司的移动接口已被收回,所以只能去移动网站抓取了。

由于公司觉得自己开发没必要,钱太少,所以所说就花了5000大洋给外包出去做了。

今天听说已开发完了,刚好我最近又准备辞职正在交接,又没事做。所以就看看是否也去做个试试。

所以就在网上查了一下关于验证码的相关文档

依葫芦画瓢,终于将这个功能搞定。

中国移动网站的验证码很简单。


这也是我第一次做验证码识别。

现将代码记录下来,说不定以后用得到,由于只是练手所以代码写得非常之烂,勿拍 偷笑

只是练习,所以特征码找得也不是很准哈,不过就现在这样识别率也能达95%。

通过这次发现验证码识别还是比较好玩的,接下来继续研究下,说不定以后能赚点小酒钱呢害羞


<?php/** * 10086验证码识别 *  * 注:验证码 周边有边框,且最后一位可能会挨着边线 *  *  */class Valite {// 传说中的特征码var $char = array('A'=>'00000011100000110011111000110011001100011001100110001100110010000111111111000001111111110000000000011000','A '=>'0000001110000011001111100011001100110001100110011000110011001000011111111100000111111111000','B'=>'11111111110001111111111000011000011000011000000110001100000011000111000011100001111111100000011111100000','B '=>'1111111111000111111111100001100001100001100000011000110000001100011100001110000111111110000','C'=>'0011111100000011111111000011100001110001100000011000110000001100011000000110000110000110000','D'=>'00111111000000111111110000111000011100011000000110001100000011000011000011000011111111110001111111111000','D '=>'0011111100000011111111000011100001110001100000011000110000001100001100001100001111111111000','E'=>'0011111100000011111111000011101101110001100110011000110011001100001111100110000011110110000','F'=>'11000000000001111111111000111111111100011000000000001100000000000','G'=>'00111111000000111111110011111000011100111000000110011100000011001011000011001111111111111111111111111110','G '=>'0011111100000011111111001111100001110011100000011001110000001100101100001100111111111111111','H' =>'11111111110001111111111000001000000000001000000000001100000000000110000000000011111111110000111111111000','H '=>'1111111111000111111111100000100000000000100000000000110000000000011000000000001111111111000','I'=>'','J'=>'00000000000010000000000001000000000000111111111111111111111111111',' J'=>'0000000000001000000000000111111111111111111111111111','K' =>'111111111100011111111110000000100000000000111000000000110110000000110001100000110000011000010000000110000000000001000','K '=>'1111111111000111111111100000001000000000001110000000001101100000001100011000001100000110000',' K'=>'11111111110001111111111000000010000000000011100000000011011000000011000110000011000001100001000000011000000000000100000000000000010000000000001000000000000111111111111111111111111111','L'=>'','M'=>'','N'=>'11111111110001111111111000001000000000001000000000001100000000000110000000000011111111110000111111111000','O'=>'','P'=>'11111111111111111111111111011000011000011000000110001100000011000111000011100001111111100000011111100000','P '=>'1111111111111111111111111101100001100001100000011000110000001100011100001110000111111110000','Q'=>'00111111000000111111110000111000011100011000000110001100000011000011000011000011111111111111111111111111','Q '=>'0011111100000011111111000011100001110001100000011000110000001100001100001100001111111111111','R'=>'11111111110001111111111000011000000000011000000000001100000000000','S'=>'011100011000011111000110001100110011000110011001100011000111110000110001110000','T'=>'11000000000001111111110000111111111100011000000110001100000011000','U'=>'11111111100001111111111000000000001100000000000110000000000010000000000010000011111111110001111111111000','U '=>'1111111110000111111111100000000000110000000000011000000000001000000000001000001111111111000','V'=>'110000000000011111000000000011111100000000001111100000000000110000000011111000001111110000011111000000001100000000000','V '=>'11000000000001111100000000001111110000000000111110000000000011000000001111100000111111000001111100000000','W'=>'','X' => '10000000010001110000111000011101111000000011110000000001111000000011110111000011100001110001000000001000','X '=> '1000000001000111000011100001110111100000001111000000000111100000001111011100001110000111000','Y'=>'110000000000011111000000000011111000001000001111111100000000111100000011111000001111100000011111000000001100000000000','Y '=>'11000000000001111100000000001111100000100000111111110000000011110000001111100000111110000001111100000000','Z'=>'1100000111000110000111100011000110110001100110011000110110001100011110000110001110000011000',);/* 读取验证码图片,并将像素RGB读入二维数组,然后分离出有效字符 */function fixImg($url){    $img = imagecreatefromjpeg($url);    $with = imagesx($img);//取得图像宽度    $height = imagesy($img);//取得图像高度    $middle = ($height-2) / 2;//中线       $colors = array();/** * 由于有边框,所有从1到宽度-1 */    for ($x = 1; $x < $with; ++$x) {    for ($y = 1; $y < $height-1; ++$y) {    //由于最后一位挨边了,所以最后增加一个空列    if( $x == $with-1){    $colors[$y][$x] = '0';continue;    }    $color = imagecolorat($img, $x, $y);    $cols = imagecolorsforindex($img, $color);    if( $cols['red'] < 125 || $cols['green']<125|| $cols['blue'] < 125 ){        $colors[$y][$x] = '1';        } else {        $colors[$y][$x] = '0';        }    }    }    return $colors;}function getHec($imgUrl){$arr = $this->fixImg($imgUrl);$topLine = 6;//上面的空行$y = 0;// 采用从上到下,从左到右顺序读特征码,从第0行开始(实际是第x行)$len = 19;// 多读一行,JQ超过20行,且J后面几行占了上个字符的位置$code = array();// 分离出来的字符特征码$str = '';$HecCode = '';for($i = 0;$i < $topLine;$i++)unset($arr[$i]);// 前几行是空白while($y < count($arr[$topLine])){$flag = true;// 全0是空白竖线$line = '';for($i = $topLine;$i < $len;$i++){if($arr[$i][$y]){//第10行,第Y列$flag = false;}$line .= $arr[$i][$y];}if($flag){if(strlen($str) > $len-$topLine)$code[] = $str;$str = '';}else{$str .= $line;}$y++;}$code[] = $str;// 输出字符foreach($code as $v){$match = false;foreach($this->char as $key => $v2){if($v === $v2 && $v!=''){//echo trim($key);$HecCode .= trim($key);$match = true;break;}}}return $HecCode;}}//保存验证码图片/** *  **/$referer = "http://service.cq.10086.cn/app?service=page/newLogin.login";$url = "http://service.cq.10086.cn/icsimage?mode=validate&width=51&height=20&temp=";$cookie = dirname(__FILE__)."/valid_cookie.tmp";$curl = curl_init($url);curl_setopt($curl, CURLOPT_REFERER, $referer);curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie);curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie);$data = curl_exec($curl);curl_close($curl);//保存图片到本地$fp = fopen("valid.jpg","wb");fwrite($fp, $data);fclose($fp);/**///识别验证码图片$valid = new Valite();$code = $valid->getHec('valid.jpg');//要查询的手机号码$tel = "13888888888";//手机的服务密码$pwd = "666666";//提交数据$postData = "EFFICACY_CODE=".$code."&Form0=blogin&SERIAL_NUMBER=".$tel."&USER_PASSSMS=&USER_PASSWD=".$pwd."&USER_PASSWD_SELECT=1&clogin=".urlencode('on&service=direct/1/newLogin.login/$Form')."&sp=S0";$ch = curl_init();//初始化curlcurl_setopt($ch,CURLOPT_URL,'http://service.cq.10086.cn/app?service=page/newLogin.login&listener=login');//抓取指定网页curl_setopt($ch, CURLOPT_HEADER, 0);//设置headercurl_setopt($ch, CURLOPT_REFERER, $referer);curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//要求结果为字符串且输出到屏幕上curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);curl_setopt($ch, CURLOPT_POST, 1);//post提交方式curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);$data = curl_exec($ch);//运行curlcurl_close($ch);//print_r($data);//输出结果//抓取结果$url = "http://service.cq.10086.cn/app?service=page/feequery.QueryBalance&listener=initPage&subSysCode=E003";$curl = curl_init($url);curl_setopt($curl, CURLOPT_REFERER, $referer);curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie);$src = curl_exec($curl);curl_close($curl);preg_match('/<td style="text\-align:center;">([0-9\.]+)元<\/td>/', iconv('gbk','utf-8', $src), $res);print_r("电话号码:".$tel."<br />");print_r("当前余额:".$res[1].'元');


原创粉丝点击