One-hot encoding 数据处理

来源:互联网 发布:ipv6跟ipv4无网络权限 编辑:程序博客网 时间:2024/06/05 07:42

import csv
import os
import shutil
import codecs
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import MultiLabelBinarizer

dir_name = ‘C:\Users\Thuang6\Desktop\MaxWellData\OneHot\csv_to_csv.csv’

path = os.chdir(‘C:\Users\Thuang6\Desktop\MaxWellData\OneHot’)
df = pd.read_csv(dir_name,names=[‘Time’,’Process’,’Component’,’Operation’,’Action’,’Control’,’Category’,’Context’],index_col = False)
df = df.fillna(value= ‘NULL’)
process = LabelBinarizer().fit_transform(df[‘Process’])
print(process)

component = LabelBinarizer().fit_transform(df[‘Component’])
print(component)

operation = LabelBinarizer().fit_transform(df[‘Operation’])
print(operation)

action = LabelBinarizer().fit_transform(df[‘Action’])
print(action)

control = LabelBinarizer().fit_transform(df[‘Control’])
print(control)

category = LabelBinarizer().fit_transform(df[‘Category’])
print(category)

final_output = np.hstack((process,component,operation,action,control,category))

print(final_output)

final_split = np.vsplit(final_output,21)

print(final_split)

print(np.shape(final_split))

print(“nihao”)
d = []
for i in range(21):
a = final_split[i]
#print(a)
b = np.ndarray.flatten(a)
c = b.tolist()
d.append(c)
#print(d)
#print(len(d))
#print(type(b))

b = np.ndarray.flatten(a)

print(np.ndarray.flatten(a))

print(d)

原创粉丝点击