php爬取安居客小区数据

来源:互联网 发布:淘宝卖家新增运费模板 编辑:程序博客网 时间:2024/04/28 02:48
ajk.php


<?php
/**
 * Created by PhpStorm.
 * User: tobosu02
 * Date: 16-4-23
 * Time: 下午4:07
 */




if(!array_key_exists('tmp',$_COOKIE)){
    setcookie('tmp',2);
}
$url = "http://shenzhen.anjuke.com/community/p{$_COOKIE['tmp']}/";
$content = file_get_contents($url);
echo $content;
?>
<script>
    $(function(){
        var titleArr = new Array();
        var picArr = new Array();
        var addressArr = new Array();
        var llArr = new Array();
        var addresses = $(".li-itemmod .li-info address");
        var titles = $(".li-itemmod .li-info h3 ");
        var pics = $(".li-itemmod a img");
        var lls = $(".li-itemmod .li-info .bot-tag span").next();
        titles.each(function(i){
            titleArr.push(titles[i].innerText);
            picArr.push(pics[i].src);
            addressArr.push(addresses[i].innerText);
            llArr.push(lls[i].href);
        });
        var titleArr2 = titleArr;//小区名称数组
        var picArr2 = picArr;//图片数组
        var addressArr2 = addressArr;//地址数组


        var llArr2 = llArr;//图片数组




        var final = new Array();


        for(var i=0;i<llArr2.length;i++){
            var title = titleArr2[i];
            var pic = picArr2[i];
            var separate = addressArr2[i].indexOf(']');


            var distirct = addressArr2[i].substring(1,separate);
            var address = addressArr2[i].substring(separate+1,addressArr2[i].length);
            var l1Sep = llArr2[i].indexOf('l1=');
            var l2Sep = llArr2[i].indexOf('l2=');
            var l1 = llArr2[i].substring(l1Sep+3,l1Sep+12);
            var l2 = llArr2[i].substring(l2Sep+3,l2Sep+12);
            var temp = [title,pic,distirct,address,l1,l2];
            final.push(temp);
        }
        var str = final.join('^');
        $.ajax({
            type:'POST',
            url:'do.php',
            data:{s:str},
            success:function(d){
                var data = $.parseJSON(d);
                if(data.code > 0){
                    var cookieTmp = $.cookie('tmp');
                    cookieTmp++;
                    $.cookie('tmp',cookieTmp);
                    setTimeout(window.location.href=window.location.href,2000);
                }else{
                    window.location.href='http://www.baidu.com';
                }
            }
        })
    })




</script>
<script src="http://apps.bdimg.com/libs/jquery/2.1.4/jquery.min.js"></script>
<script src="http://apps.bdimg.com/libs/jquery.cookie/1.4.1/jquery.cookie.js"></script>


do.php




<?php
require_once 'Mysql.class.php';
$conf['host'] = '127.0.0.1';
$conf['port'] = '3306';
$conf['user'] = 'root';
$conf['passwd'] = '';
$conf['dbname'] = 'test';
$obj = new AllMysql($conf);
if(strlen($_POST['s']) >50){
    $arr = explode('^',$_POST['s']);
    foreach($arr as $v){
        $tmpArr = explode(',',$v);
        if(is_array($tmpArr) && !empty($tmpArr)){
            $arr = array(
                'title' => $tmpArr[0],
                'pic' => $tmpArr[1],
                'district' => $tmpArr[2],
                'address' => $tmpArr[3],
                'l1' => $tmpArr[4],
                'l2' => $tmpArr[5],
            );
            $obj->insert('anjuke',$arr);
        }
    }
    $ret = array(
        'code' => 1,
        'msg' => 'go on',
    );
    echo json_encode($ret);exit;
}else{
    $ret = array(
        'code' => 0,
        'msg' => 'end',
    );
    echo json_encode($ret);exit;
}


?>


sub.php


<?php
/**
 * Created by PhpStorm.
 * User: tobosu02
 * Date: 16-6-8
 * Time: 上午10:42
 */


require_once 'Mysql.class.php';
set_time_limit (60*60);
$conf['host'] = '127.0.0.1';
$conf['port'] = '3306';
$conf['user'] = 'root';
$conf['passwd'] = '';
$conf['dbname'] = 'test';
$obj = new AllMysql($conf);


//$list = $obj->select('anjuke_copy');
//var_dump($list);
/*foreach($list as &$v){
    $arr = explode('-',$v['district']);
    $v['districtname'] = $arr[0];
    unset($v['district']);
    $obj->insert('anjuke_copy',$v);
}*/


$sql = "SELECT a.*,d.CityID as cityid,d.DistrictID as districtid,d.pinyin FROM anjuke_copy a inner JOIN district d ON a.districtname=d.DistrictName";
$list = $obj->doSql($sql);


foreach($list as &$v){
    $obj->insert('anjuke_copy2',$v);
}



















0 1