【python 爬虫】全国失信被执行人名单查询功能

来源:互联网 发布:vmware中linux上网 编辑:程序博客网 时间:2024/04/27 13:29

一、需求说明
利用百度的接口,实现一个全国失信被执行人名单查询功能。输入姓名,查询是否在全国失信被执行人名单中。

这里写图片描述

二、python实现

版本1:

# -*- coding:utf-8*-import sysreload(sys)sys.setdefaultencoding('utf-8')import timeimport requeststime1=time.time()import pandas as pdimport  jsoniname=[]icard=[]def person_executed(name):    for i in range(0,30):        try:            url="https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php?resource_id=6899" \            "&query=%E5%A4%B1%E4%BF%A1%E8%A2%AB%E6%89%A7%E8%A1%8C%E4%BA%BA%E5%90%8D%E5%8D%95" \            "&cardNum=&" \            "iname="+str(name)+ \            "&areaName=" \            "&pn="+str(i*10)+ \            "&rn=10" \            "&ie=utf-8&oe=utf-8&format=json"            html=requests.get(url).content            html_json=json.loads(html)            html_data=html_json['data']            for each in html_data:                k=each['result']                for each in k:                    print each['iname'],each['cardNum']                    iname.append(each['iname'])                    icard.append(each['cardNum'])        except:            passif __name__ == '__main__':    name="郭家松"    person_executed(name)    print len(iname)    #####################将数据组织成数据框###########################    data=pd.DataFrame({"name":iname,"IDCard":icard})    #################数据框去重####################################    data1=data.drop_duplicates()    print data1    print len(data1)    #########################写出数据到excel#########################################    pd.DataFrame.to_excel(data1,"F:\\iname_icard_query.xlsx",header=True,encoding='gbk',index=False)    time2=time.time()    print u'ok,爬虫结束!'    print u'总共耗时:'+str(time2-time1)+'s'

三、效果展示

"D:\Program Files\Python27\python.exe" D:/PycharmProjects/learn2017/全国失信被执行人查询.py郭家松 34122319790****5119郭家松 32032119881****2419郭家松 32032119881****24193                IDCard name0  34122319790****5119  郭家松1  32032119881****2419  郭家松2ok,爬虫结束!总共耗时:7.72000002861sProcess finished with exit code 0

版本2:

# -*- coding:utf-8*-import sysreload(sys)sys.setdefaultencoding('utf-8')import timeimport requeststime1=time.time()import pandas as pdimport  jsoniname=[]icard=[]courtName=[]areaName=[]caseCode=[]duty=[]performance=[]disruptTypeName=[]publishDate=[]def person_executed(name):    for i in range(0,30):        try:            url="https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php?resource_id=6899" \            "&query=%E5%A4%B1%E4%BF%A1%E8%A2%AB%E6%89%A7%E8%A1%8C%E4%BA%BA%E5%90%8D%E5%8D%95" \            "&cardNum=&" \            "iname="+str(name)+ \            "&areaName=" \            "&pn="+str(i*10)+ \            "&rn=10" \            "&ie=utf-8&oe=utf-8&format=json"            html=requests.get(url).content            html_json=json.loads(html)            html_data=html_json['data']            for each in html_data:                k=each['result']                for each in k:                    print each['iname'],each['cardNum'],each['courtName'],each['areaName'],each['caseCode'],each['duty'],each['performance'],each['disruptTypeName'],each['publishDate']                    iname.append(each['iname'])                    icard.append(each['cardNum'])                    courtName.append(each['courtName'])                    areaName.append(each['areaName'])                    caseCode.append(each['caseCode'])                    duty.append(each['duty'])                    performance.append(each['performance'])                    disruptTypeName.append(each['disruptTypeName'])                    publishDate.append(each['publishDate'])        except:            passif __name__ == '__main__':    name="郭家松"    person_executed(name)    print len(iname)    #####################将数据组织成数据框###########################    # data=pd.DataFrame({"name":iname,"IDCard":icard})    detail_data=pd.DataFrame({"name":iname,"IDCard":icard,"courtName":courtName,"areaName":areaName,"caseCode":caseCode,"duty":duty,"performance":performance,\                              "disruptTypeName":disruptTypeName,"publishDate":publishDate})    #################数据框去重####################################    # data1=data.drop_duplicates()    # print data1    # print len(data1)    detail_data1=detail_data.drop_duplicates()    # print detail_data1    # print len(detail_data1)    #########################写出数据到excel#########################################    pd.DataFrame.to_excel(detail_data1,"F:\\iname_icard_query.xlsx",header=True,encoding='gbk',index=False)    time2=time.time()    print u'ok,爬虫结束!'    print u'总共耗时:'+str(time2-time1)+'s'
原创粉丝点击