读EXCEL数据,通过百度NLP分析情感倾向,写入xls

来源:互联网 发布:frp需要域名吗 编辑:程序博客网 时间:2024/05/16 14:32
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 25 17:40:57 2017


@author: Administrator
"""


import pandas as pd
from aip import AipNlp


# 定义常量
APP_ID = '10251280'
API_KEY = 'd2sWjj6w9pNMKUHFIabPqIiA'
SECRET_KEY = 'fGuEjHqGxHP5EdtTgGgD70QG0Gh9j8Ur '
# 初始化AipNlp对象
aipNlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
posi, nega  = [] ,[]




        
def get_data():
    '''
    获取content列每一行的文本信息
    '''     
    df = pd.read_excel('jinrongjietable.xlsx', encoding = 'utf-8')            
    news = df['content']
    #news.replace(u'\xa0 ', u' ')
    return news


def analysis_news(news):
    '''
    分析新闻,得到正面,负面指数
    '''
    for i in range(len(news)):

        content = news[i].replace(u'\xa0', u' ')

        #初次运行时报错,'gbk' codec can't encode character u'\xa0' ,需要替换

        result = aipNlp.sentimentClassify(content)
        negative_prob = result['items'][0]['negative_prob']
        positive_prob = result['items'][0]['positive_prob']
        posi.append(positive_prob)
        nega.append(negative_prob)
        print len(posi)
    return posi, nega


def write_excel(posi, nega):
    '''
    写入excel
    '''    
    posi_col = pd.DataFrame(posi)
    nega_col = pd.DataFrame(nega)


    posi_col.to_excel('posi.xlsx', encoding = 'utf-8')
    nega_col.to_excel('nega.xlsx')
    
    
if __name__ == "__main__":


    news = get_data()
    try:
        posi, nega = analysis_news(news)
    except Exception as e:
        print e
        
    write_excel(posi, nega)



















原创粉丝点击