pyhon3.5 打开csv文件中文乱码问题

来源:互联网 发布:查询淘宝关键字搜索量 编辑:程序博客网 时间:2024/06/10 08:47

将编码格式改成gb18030就ok了

# -*- coding: utf-8 -*-"""Created on Wed Aug  2 16:56:32 2017@author: lengxia"""import urllib.requestimport reimport csvimport codecsimport sysfrom bs4 import BeautifulSoup import importlibwiki = 'https://zh.wikipedia.org/wiki/%E6%96%87%E4%BB%B6%E7%BC%96%E8%BE%91%E5%99%A8%E6%AF%94%E8%BE%83'header = {'User-Agent': 'Mozilla/5.0'}page = urllib.request.urlopen(wiki).read()page=page.decode("utf8")soup = BeautifulSoup(page,"lxml")name = ""       #名字creater = ""    #归属first = ""      #首次公开发布的时间latest = ""     #最新稳定版本cost = ""       #售价licence = ""    #授权条款table = soup.find("table", {"class" : "sortable wikitable"})f=open('url2table.csv', 'w',encoding="gb18030")csv_writer = csv.writer(f)td_th = re.compile('t[dh]')for row in table.findAll("tr"):    cells = row.findAll(td_th)    if len(cells) == 6:        name = cells[0].find(text=True)        if not name:            continue        creater = cells[1].find(text=True)        first = cells[2].find(text=True)        latest = cells[3].find(text=True)        cost = cells[4].find(text=True)        licence = cells[5].find(text=True)        print([ x for x in [name, creater, first, latest, cost, licence]])        csv_writer.writerow([ x for x in [name, creater, first, latest, cost, licence]])f.close()
原创粉丝点击