NB的第三方博客平台自动登录机

来源:互联网 发布:visio软件下载 编辑:程序博客网 时间:2024/05/01 16:21

下面是两个主体函数,第一个是curl的函数,引用到就行

第二个是main函数,

使用方法self::platLogin(登录URL,用户名,密码);



//CURL数据传输

    public static function curl($url,array $params = array()){
        $init = curl_init();
        curl_setopt($init,CURLOPT_URL,$url);
        curl_setopt($init,CURLOPT_RETURNTRANSFER,true);
        curl_setopt ( $init, CURLOPT_SSL_VERIFYPEER, 0 );
        curl_setopt ( $init, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2 (.NET CLR 3.5.30729)" );
        //curl_setopt ( $init, CURLOPT_HEADER, false );
        //curl_setopt ( $init, CURLOPT_NOBODY, false );
        curl_setopt ( $init, CURLOPT_FOLLOWLOCATION, 1 ); // 使用自动跳转
        if (count($params)){
            curl_setopt($init,CURLOPT_POST,true);
            curl_setopt($init,CURLOPT_POSTFIELDS,$params);
        }
        $result = curl_exec($init);
        if (curl_errno ( $init )) {
            echo '[**vLogin Error**]' . ' - getCookieHtml: ' . curl_error ( $init );
            //return 'ERROR';
            return '';
        }
        curl_close($init);
        return $result;

    }


//第三方平台自动登录
    public static function platLogin($loginUrl,$username,$password)
    {
        $loginUrl = urldecode ( $loginUrl ); //对url进行解码
        $username = $username;
        $password = urldecode ( $password );
        /*缓存登录页+登录规则*/
        $cache = Cache::instance();
        $loginCacheTime = 3600 * 24 * 20;
        $filer = array(
            'http://','www.'
        );
        $cacheFileSubName = eregi_replace('[^a-zA-Z|0-9|-|.]', '-', trim(str_replace($filer, '', $loginUrl), '/'));
        $loginCache = $cache->get('loginCache.' . $cacheFileSubName);
        if ($loginCache == '')
        {
            //$loginPageCache[loginPageHtml] = file_get_contents($loginUrl);
            //用curl提升抓取速率
            $strPageHtml = self::curl($loginUrl);
            if (trim($strPageHtml) != '' && $strPageHtml != false)
            {
                $strPageHtml = str_replace(array(
                    "\n",
                    "\r\n",
                    "\r"
                ), '', $strPageHtml);
                /*抓取登录规则*/
                //wp: /<form name=[\"|']loginform[\"|'](.*?)<\/form>/
                //LT: /<form name=[\"|']loginForm[\"|'](.*?)<\/form>/
                //BS: /<form action=[\"|']login.php[\"|'](.*?)<\/form>/
                //统一:/<form(.*?)<\/form>/ 循环抓取,再判断哪个里有login
                preg_match_all("/<form(.*?)<\/form>/", $strPageHtml, $aryForm);
                //print_r($aryForm); die();
                $aryLoginFrmStr = array(
                    '1'=>'login',
                    '2'=>'password'
                );
                foreach ($aryLoginFrmStr as $strLoginFrmStr)
                {
                    $str = $strLoginFrmStr;
                    foreach ($aryForm[0] as $key => $strFromHtml)
                    {
                        //[0][0],[0][1],[0][2],...一个页面多个表单
                        //找到其中"form有name"和"form中包含login字符串"的表单
                        //$str = 'login';
                        $exist = preg_match("/{$str}/", $strFromHtml);
                        if ($exist)
                        {
                            //echo "exist"; die(); //存在
                            $formName = 'forms[' . $key . ']'; //为了避免表单无name属性和一页面多表单情况
                            $actionUrl = '';
                            if ($formName != '')
                            {
                                //表单名存在,则抓取actionUrl
                                //preg_match("/action=[\"|']+(.*?)[\"|']+/",str_replace("\n",'',$aryFormHeader[0]),$aryActionUrl);
                                preg_match("/action=[\"|']{1}(.*?)[\"|']{1}/", $strFromHtml, $aryActionUrl);
                                //print_r($aryActionUrl); die();
                                if (substr($aryActionUrl[1], 0, 7) == 'http://' || substr($aryActionUrl[1], 0, 8) == 'https://')
                                {
                                    $actionUrl = $aryActionUrl[1];
                                }elseif ($aryActionUrl[1] == '' || $aryActionUrl[1] == '#')
                                {
                                    $actionUrl = $loginUrl;
                                }else
                                {
                                    //$aryActionUrl[1] => admin.php
                                    /*
                            用户可能会填写这样的地址 http://www.auto-blog.fr 则会得到http://login.php
                            为了有需用户必须填写为 http://www.auto-blog.fr/下面程序做判断
                            :如果[2]为做最后一个数组,则表示是没有'/',则数组不用-1
                            */
                                    $ary = explode('/', $loginUrl);
                                    $aryCNT = count($ary);
                                    if ($aryCNT == 3)
                                        $intLess = 0;
                                    else
                                        $intLess = 1;
                                    for($i = 0;$i < $aryCNT - $intLess;$i ++)
                                    {
                                        $actionUrl .= $ary[$i] . '/';
                                    }
                                    //$actionUrl .= str_replace('/','',$aryActionUrl[1]);
                                    $actionUrl .= trim($aryActionUrl[1], '/');
                                    //http://www.bigadda.com/login/login
                                //$actionUrl = str_replace($aryActionUrl[1],'',$actionUrl) . trim($aryActionUrl[1],'/');
                                }
                                //echo $actionUrl;
                                //抓取表单项name
                                //preg_match_all("/<input(.*?)name=[\"|']+(.*?)[\"|']+(.*?)>/",$strFromHtml,$aryFormItem);
                                preg_match_all("/<(input|INPUT)(.*?)name=[\"|']*(.*?)[\"|'| ]+(.*?)>/", $strFromHtml, $aryFormItem);
                                //print_r($aryFormItem); die();
                                foreach ($aryFormItem[0] as $key => $value)
                                {
                                    //$passwordType = "password";
                                    //$isPassword=preg_match("/\b{$passwordType}\b/",$value);
                                    $passwordType = "type=password";
                                    $isPassword = preg_match("/\b{$passwordType}\b/", str_replace(array(
                                        "'",
                                        '"'
                                    ), '', $value));
                                    if ($isPassword)
                                    {
                                        //是密码项
                                        //password紧接上面的一定的username
                                        $formPasswordItemName = $aryFormItem[3][$key];
                                        $formUsernameItemName = $aryFormItem[3][$key - 1];
                                        /*当name值为数组时,JS无法给文本框赋值,所以添加ID属性,通过ID赋值*/
                                        //用户名
                                        $str = preg_replace("/(.*?)id=[\"|']+(.*?)[\"|']+(.*?)/i", "$1$3", $aryFormItem[0][$key - 1]);
                                        $strFromUsernameItem = str_replace(array(
                                            '<input',
                                            '<INPUT'
                                        ), '<input id="username_js"', $str);
                                        $strPageHtml = str_replace($aryFormItem[0][$key - 1], $strFromUsernameItem, $strPageHtml);
                                        //密码
                                        $str = preg_replace("/(.*?)id=[\"|']+(.*?)[\"|']+(.*?)/i", "$1$3", $aryFormItem[0][$key]);
                                        $strFromPasswordItem = str_replace(array(
                                            '<input',
                                            '<INPUT'
                                        ), '<input id="password_js"', $str);
                                        $strPageHtml = str_replace($aryFormItem[0][$key], $strFromPasswordItem, $strPageHtml);
                                        //好像action的JS也不能顺利赋值,所以将action=直接程序替换
                                        $strPageHtml = str_replace($aryActionUrl[0], 'action="' . $actionUrl . '"', $strPageHtml);
                                        /*插入提交按钮的ID属性*/
                                        //preg_match("/<input [a-zA-Z0-9_|\"|'| |=|:|#|    |-]*type=[\"|']*submit[\"|']*(.*?)>/",$strFromHtml,$aryFromSumbit);
                                        //使用排除字符串方法的正则  ((?!Name:).)*  排除Name:字符串!!
                                        preg_match("/<(input|INPUT)((?!>).)*type=[\"|']*submit[\"|']*(.*?)>/", $strFromHtml, $aryFromSumbit);
                                        //var_dump($aryFromSumbit);
                                        if ($aryFromSumbit[0] != '')
                                        {
                                            $submitJS = 'YES';
                                            $str = preg_replace("/(.*?)id=[\"|']+(.*?)[\"|']+(.*?)/i", "$1$3", $aryFromSumbit[0]);
                                            //echo $str;
                                            $strFromSubmitItem = str_replace(array(
                                                '<input',
                                                '<INPUT'
                                            ), '<input id="submit_js"', $str);
                                            //echo $strFromSubmitItem; die();
                                            $strPageHtml = str_replace($aryFromSumbit[0], $strFromSubmitItem, $strPageHtml);
                                            //echo $strPageHtml; die();
                                        }else
                                        {
                                            $submitJS = 'NO';
                                        }
                                        //去掉页面JS提升加载速度
                                        $strPageHtml = preg_replace("/<script(.*?)<\/script>/i", "", $strPageHtml);
                                        $strPageHtml = preg_replace("/<link(.*?)>/i", "", $strPageHtml);
                                        break;
                                    }
                                }
                                if ($isPassword)
                                {
                                    break; //找到后再退出第一重foreach
                                }
                            }
                        }
                    }
                    if ($isPassword)
                    {
                        break; //找到后再退出foreach($aryLoginFrmStr)
                    }
                }
                //echo $formName,'|',$actionUrl,'|',$formUsernameItemName,'|',$formPasswordItemName; die();
                $loginCache[loginPageHtml] = $strPageHtml;
                $loginCache[formName] = $formName;
                $loginCache[actionUrl] = $actionUrl;
                $loginCache[formUsernameItemName] = $formUsernameItemName;
                $loginCache[formPasswordItemName] = $formPasswordItemName;
                $loginCache[submitJS] = $submitJS;
                $cache->set('loginCache.' . $cacheFileSubName, $loginCache, $loginCacheTime);
            }
        }
        //$strPageHtml = str_replace("\'","'",$loginCache[loginPageHtml]);
        $strPageHtml = $loginCache[loginPageHtml];
        $formName = $loginCache[formName];
        $actionUrl = $loginCache[actionUrl];
        $formUsernameItemName = $loginCache[formUsernameItemName];
        $formPasswordItemName = $loginCache[formPasswordItemName];
        $submitJS = $loginCache[submitJS];
        //echo $formName,'|',$actionUrl,'|',$formUsernameItemName,'|',$formPasswordItemName; die();
        if ($formName != '' && $actionUrl != '' && $formUsernameItemName != '' && $formPasswordItemName != '')
        {
            //自动登录
            echo $strPageHtml;
            /*
    echo '
    <script>
    document.body.style.display = "none";
    document.'.$formName.'.action="'.$actionUrl.'";
    document.'.$formName.'.'.$formUsernameItemName.'.value="'.$username.'";
    document.'.$formName.'.'.$formPasswordItemName.'.value="'.$password.'";
    document.'.$formName.'.submit();
    </script>
     ';
     */
            //document.forms[1].submit(); //有时不能自动提交,出现JS错误:对象不支持此属性或方法
            //document.getElementById("submit").click();//自动模拟点击按钮[完全可以]
            if ($submitJS == 'YES')
            {
                echo '
        <script>
        document.body.style.display = "none";
        document.getElementById("username_js").value="' . $username . '";
        document.getElementById("password_js").value="' . $password . '";
        document.getElementById("submit_js").click();
        </script>
         ';
                //id=submit_js是由程序处理添加到页面HTML中的!
            }else
            {
                echo '
        <script>
        document.body.style.display = "none";
        document.getElementById("username_js").value="' . $username . '";
        document.getElementById("password_js").value="' . $password . '";
        document.' . $formName . '.submit();
        </script>
         ';
            }
        }
        //未提交跳转则直接打开(因为有时没有登录且还在oss.ketai-inc.com页)
        echo '<script language="javascript">location.href="' . $loginUrl . '";</script>';
    }