评估红酒口感

来源:互联网 发布:dynamic duo知乎 编辑:程序博客网 时间:2024/04/19 16:46

评估红酒口感¶
问题描述:属性是红酒的化学成分的测量指标,标签是评酒员的评分。

import pandas as pdfrom pandas import DataFramefrom pylab import *import matplotlib.pyplot as plttarget_url = ("http://archive.ics.uci.edu/ml/machine-" "learning-databases/wine-quality/winequality-red.csv")wine = pd.read_csv(target_url, header=0,sep=';')print(wine.head())

输出:
fixed acidity volatile acidity citric acid residual sugar chlorides \
0 7.4 0.70 0.00 1.9 0.076
1 7.8 0.88 0.00 2.6 0.098
2 7.8 0.76 0.04 2.3 0.092
3 11.2 0.28 0.56 1.9 0.075
4 7.4 0.70 0.00 1.9 0.076

free sulfur dioxide total sulfur dioxide density pH sulphates \
0 11.0 34.0 0.9978 3.51 0.56
1 25.0 67.0 0.9968 3.20 0.68
2 15.0 54.0 0.9970 3.26 0.65
3 17.0 60.0 0.9980 3.16 0.58
4 11.0 34.0 0.9978 3.51 0.56

alcohol quality
0 9.4 5
1 9.8 5
2 9.8 5
3 9.8 6
4 9.4 5


summary = wine.describe()print(summary)
  fixed acidity  volatile acidity  citric acid  residual sugar  \

count 1599.000000 1599.000000 1599.000000 1599.000000
mean 8.319637 0.527821 0.270976 2.538806
std 1.741096 0.179060 0.194801 1.409928
min 4.600000 0.120000 0.000000 0.900000
25% 7.100000 0.390000 0.090000 1.900000
50% 7.900000 0.520000 0.260000 2.200000
75% 9.200000 0.640000 0.420000 2.600000
max 15.900000 1.580000 1.000000 15.500000


wineNormalized = winencols = len(wineNormalized.columns)for i in range(ncols):    mean = summary.iloc[1,i]    sd = summary.iloc[2,i]    wineNormalized.iloc[:,i:(i+1)] = (wineNormalized.iloc[:,i:(i + 1)] - mean) / sdarray = wineNormalized.valuesboxplot(array)plt.xlabel('Attribute Index')plt.ylabel('Quartile Ranges - Normalized ')show()
  • 平行坐标图
import pandas as pdfrom pandas import DataFramefrom pylab import *import matplotlib.pyplot as plotfrom math import exptarget_url = ("http://archive.ics.uci.edu/ml/machine-" "learning-databases/wine-quality/winequality-red.csv")wine = pd.read_csv(target_url,header=0, sep=";")#print column names in order to have the full versionsprint(wine.columns)* # change column names to shorter ones to fit graphwine.columns = ['fixAcid', 'volAcid', 'citAcid',    'resSugr', 'chlor', 'frSO2', 'totSO2',    'dens', 'pH', 'sulpha', 'alcohol', 'quality']#generate statistical summariessummary = wine.describe()nrows = len(wine.index)tasteCol = len(summary.columns)meanTaste = summary.iloc[1,tasteCol - 1]sdTaste = summary.iloc[2,tasteCol - 1]nDataCol = len(wine.columns) -1for i in range(nrows):    #plot rows of data as if they were series data    dataRow = wine.iloc[i,1:nDataCol]    normTarget = (wine.iloc[i,nDataCol] - meanTaste)/sdTaste    labelColor = 1.0/(1.0 + exp(-normTarget))    dataRow.plot(color=plot.cm.RdYlBu(labelColor), alpha=0.5)plot.xlabel("Attribute Index")plot.ylabel(("Attribute Values"))plot.show()wineNormalized = winencols = len(wineNormalized.columns)for i in range(ncols):    mean = summary.iloc[1, i]    sd = summary.iloc[2, i]    wineNormalized.iloc[:,i:(i + 1)] = \        (wineNormalized.iloc[:,i:(i + 1)] - mean) / sd#Try again with normalized valuesfor i in range(nrows):    #plot rows of data as if they were series data    dataRow = wineNormalized.iloc[i,1:nDataCol]    normTarget = wineNormalized.iloc[i,nDataCol]    labelColor = 1.0/(1.0 + exp(-normTarget))    dataRow.plot(color=plot.cm.RdYlBu(labelColor), alpha=0.5)plot.xlabel("Attribute Index")plot.ylabel(("Attribute Values"))plot.show()