有关csv格式的数据转换成libsvm格式的数据

来源:互联网 发布:360数据恢复手机版 编辑:程序博客网 时间:2024/05/19 02:45

需要传入一个pandas的DataFrame格式的数据,将其转换为libsvm格式的数据
代码如下

# -*- coding: utf-8 -*-"""Created on Sat May 13 21:50:03 2017@author: Administrator"""import pandas as pd import timedef df2ffm(df, fp):        '''        Convert pandas.DataFrame to data format that libffm can directly use        @Args:            df: pandas.DataFrame to be converted            fp: save libffm format data to fp<filepath>        '''        now = time.time()        print('Format Converting begin in time:...',now)        columns = df.columns.values        d = len(columns)        feature_index = [i for i in range(d)]        field_index = [0]*d        field = []        for col in columns:            field.append(col.split('_')[0])        index = -1        for i in range(d):            if i==0 or field[i]!=field[i-1]:                index+=1            field_index[i] = index        with open(fp, 'w') as f:            for row in df.values:                line =str(int(row[0]))                for i in range(1, len(row)):                    if row[i]!=0:                        line += " %d:%d" % (feature_index[i], row[i])                line+='\n'                f.write(line)        print('finish convert,the cost time is ',time.time()-now)        print('[Done]')        print()def main():    df = pd.read_csv(r'E:\tencent\input\train1.csv')    df = df.fillna(0)     fp = r'E:\tencent\output\train.csv'     df2ffm(df,fp)if __name__ == '__main__':    main()
0 0
原创粉丝点击