翻译+文本处理

来源:互联网 发布:好的自考软件 编辑:程序博客网 时间:2024/05/17 01:36

lang_dict = {"阿尔巴尼亚语":"sq","阿拉伯语":"ar","阿塞拜疆语":"az",
"爱尔兰语":"ga","爱沙尼亚语":"et","白俄罗斯语":"be","保加利亚语":"bg",
"冰岛语":"is","波兰语":"pl","波斯语":"fa","布尔语(南非荷兰语)":"af",
"丹麦语":"da","德语":"de","俄语":"ru","法语":"fr","菲律宾语":"tl",
"芬兰语":"fi","格鲁吉亚语":"ka","古吉拉特语":"gu","海地克里奥尔语":"ht",
"韩语":"ko","荷兰语":"nl","加利西亚语":"gl","加泰罗尼亚语":"ca",
"捷克语":"cs","克罗地亚语":"hr","拉丁语":"la","拉脱维亚语":"lv",
"立陶宛语":"lt","罗马尼亚语":"ro","马耳他语":"mt","马来语":"ms",
"马其顿语":"mk","孟加拉语":"bn","挪威语":"no","葡萄牙语":"pt",
"日语":"ja","瑞典语":"sv","塞尔维亚语":"sr","斯洛伐克语":"sk",
"斯洛文尼亚语":"sl","斯瓦希里语":"sw","泰语":"th","土耳其语":"tr",
"威尔士语":"cy","乌克兰语":"uk","希伯来语":"iw","希腊语":"el",
"西班牙的巴斯克语":"eu","西班牙语":"es","匈牙利语":"hu","亚美尼亚语":"hy",
"意大利语":"it","意地绪语":"yi","印地语":"hi","印度的卡纳达语":"kn",
"印度的泰卢固语":"te","印度的泰米尔语":"ta","印度的乌尔都语":"ur",
"印尼语":"id","英语":"en","越南语":"vi","中文(简体)":"zh-CN","中文(繁体)":"zh-TW"}

from openpyxl import load_workbook
import re
from urllib import request
import hashlib
import urllib
import random
import json
import time
import os
appid = '20170307000041649'
secretKey = 'JcXq9a9QwvxN2l6AhIqH'
url_baidu = 'http://api.fanyi.baidu.com/api/trans/vip/translate'
def translateBaidu(text, f='ja', t='en'):
  salt = random.randint(32768, 65536)
  sign = appid + text + str(salt) + secretKey
  sign = hashlib.md5(sign.encode()).hexdigest()
  url = url_baidu + '?appid=' + appid + '&q=' + urllib.parse.quote(text) + '&from=' + f + '&to=' + t + \
 '&salt=' + str(salt) + '&sign=' + sign
  response = request.urlopen(url)
  content = response.read().decode('utf-8')
  data = json.loads(content)
  result = str(data['trans_result'][0]['dst'])
  return(result)
#打开一个workbook
wb = load_workbook(filename="hang3.xlsx")
#获取当前活跃的worksheet,默认就是第一个worksheet
#ws = wb.active 
#当然也可以使用下面的方法
#获取所有表格(worksheet)的名字
fpos = open("ert-polarity.pos", "w",encoding="utf-8")
fneg = open("ert-polarity.neg", "w",encoding="utf-8")
fout = open("yingyu.txt", "w",encoding="utf-8")
sheets = wb.get_sheet_names()
#第一个表格的名称
sheet_first = sheets[0]
#获取特定的worksheet
ws = wb.get_sheet_by_name(sheet_first)
#获取表格所有行和列,两者都是可迭代的
rows = ws.rows
columns = ws.columns
#迭代所有的行
n=[]
for row in rows:
  line = [col.value for col in row]
  num=float(line[3][:-1])
  n.append(num)
#写入F行
#英语评论写入G行 共ws.max_row行
"""
for i in range(4001,ws.max_row):
 #    ws["F%d" % (i+1)].value =n[i]
   
     ws["G%d" % (i+1)].value = translateBaidu(ws["E%d" % (i+1)].value, f='ja', t='en')
     fout.write(ws["E%d" % (i+1)].value) + fout.write('||')+fout.write(str(translateBaidu(ws["E%d" % (i+1)].value, f='ja', t='en') ))
     +fout.write("\n")
"""  
"""for i in range(0,ws.max_row):
    if(ws["F%d" % (i+1)].value>=4):  
        fout.write(ws["E%d" % (i+1)].value) + fout.write('||')+fout.write("5")+fout.write("\n")
    elif(ws["F%d" % (i+1)].value>=3):
        fout.write(ws["E%d" % (i+1)].value) + fout.write('||')+fout.write("4")+fout.write("\n")
    elif(ws["F%d" % (i+1)].value>=2):
        fout.write(ws["E%d" % (i+1)].value) + fout.write('||')+fout.write("3")+fout.write("\n")
    elif(ws["F%d" % (i+1)].value>=1):
        fout.write(ws["E%d" % (i+1)].value) + fout.write('||')+fout.write("2")+fout.write("\n")
    else:  
        fout.write(ws["E%d" % (i+1)].value) + fout.write('||')+fout.write("1")+fout.write("\n")
"""
for i in range(0,ws.max_row):
      if(ws["F%d" % (i+1)].value>3.9):
          if(ws["G%d" % (i+1)].value is not None):
            fpos.write(ws["G%d" % (i+1)].value) + fpos.write("\n")
      elif(ws["F%d" % (i+1)].value<2.9):
          if(ws["G%d" % (i+1)].value is not None):
            fneg.write(ws["G%d" % (i+1)].value) + fneg.write("\n")
      else:
          pass
fpos.close()
fneg.close()
fout.close()
#通过坐标读取值
#print(n)
print(ws.cell('A1').value)# A表示列,1表示行
print(ws.cell(row=1, column=6).value)
wb.save("hang3.xlsx") 

原创粉丝点击