抓取数据为json文件,读入csv

来源:互联网 发布:iphone软件下载 编辑:程序博客网 时间:2024/06/02 05:30

    • 抓取数据到json
    • 把json导入csv

抓取数据到json

#coding:utf-8from bs4 import BeautifulSoupimport urllib2import urllibimport timeimport osimport jsonimport json2csvdef tencent(path):    url="http://hr.tencent.com/"    request=urllib2.Request(url+"position.php?&start=10#a")    response=urllib2.urlopen(request)    resHtml=response.read()    output=open(path,'w')    html=BeautifulSoup(resHtml,"lxml")    #创建CSS选择器    result=html.select('tr[class="even"]')    result2=html.select('tr[class="odd"]')    result+=result2    items=[]    for site in result:        item={}        name=site.select("td a")[0].get_text()        detailLink=site.select("td a")[0].attrs["href"]        catalog=site.select('td')[1].get_text()        recruitNumber=site.select('td')[2].get_text()        workLocation=site.select('td')[3].get_text()        publishTime=site.select('td')[4].get_text()        item["name"]=name        item['detailLink']=url+detailLink        item['catalog']=catalog        item['recruitNumber']=recruitNumber        item['publishTime']=publishTime        items.append(item)    line=json.dumps(items,ensure_ascii=False)    output.write(line.encode("utf-8"))    return 1if __name__=="__main__":    path=("tencent.json")    tencent(path)    print(os.path.isfile(path))    if os.path.isfile(path)==1:        json2csv.run(path)

把json导入csv

#coding:utf-8import jsonimport csvimport sysreload(sys)sys.setdefaultencoding('gbk')#这个实现的是抓取后变成中文。# o = json.loads(json_str)def loop_data(o, k=''):    global json_ob, c_line    if isinstance(o, dict):        for key, value in o.items():            if(k==''):                loop_data(value, key)            else:                loop_data(value, k + '.' + key)    elif isinstance(o, list):        for ov in o:            loop_data(ov, k)    else:        if not k in json_ob:            json_ob[k]={}        json_ob[k][c_line]=odef get_title_rows(json_ob):    title = []    row_num = 0    rows=[]    for key in json_ob:        title.append(key)        v = json_ob[key]        if len(v)>row_num:            row_num = len(v)        continue    for i in range(row_num):        row = {}        for k in json_ob:            v = json_ob[k]            if i in v.keys():                row[k]=v[i]            else:                row[k] = ''        rows.append(row)    return title, rowsdef write_csv(title, rows, csv_file_name):    with open(csv_file_name, 'w') as csv_file:        writer = csv.DictWriter(csv_file, fieldnames=title)        writer.writeheader()        writer.writerows(rows)def json_to_csv(object_list):    global json_ob, c_line    json_ob = {}    c_line = 0    for ov in object_list :        loop_data(ov)        c_line += 1    title, rows = get_title_rows(json_ob)    write_csv(title, rows, 'test.csv')def run(path):    o = json.load(open(path))    json_to_csv(o)
原创粉丝点击