Logistic Regression
来源:互联网 发布:centos 6.5 硬盘分区 编辑:程序博客网 时间:2024/06/03 22:55
In [1]:
# glass identification datasetimport pandas as pdurl = 'http://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data'col_names = ['id','ri','na','mg','al','si','k','ca','ba','fe','glass_type']glass = pd.read_csv(url, names=col_names, index_col='id')glass.sort('al', inplace=True)glass.head()
Out[1]:
In [2]:
import seaborn as snsimport matplotlib.pyplot as plt%matplotlib inlinesns.set(font_scale=1.5)
In [3]:
sns.lmplot(x='al', y='ri', data=glass, ci=None)
Out[3]:
In [4]:
# scatter plot using Pandasglass.plot(kind='scatter', x='al', y='ri')
Out[4]:
In [5]:
# equivalent scatter plot using Matplotlibplt.scatter(glass.al, glass.ri)plt.xlabel('al')plt.ylabel('ri')
Out[5]:
In [6]:
# fit a linear regression modelfrom sklearn.linear_model import LinearRegressionlinreg = LinearRegression()feature_cols = ['al']X = glass[feature_cols]y = glass.rilinreg.fit(X, y)
Out[6]:
In [7]:
# make predictions for all values of Xglass['ri_pred'] = linreg.predict(X)glass.head()
Out[7]:
In [8]:
# plot those predictions connected by a lineplt.plot(glass.al, glass.ri_pred, color='red')plt.xlabel('al')plt.ylabel('Predicted ri')
Out[8]:
In [9]:
# put the plots togetherplt.scatter(glass.al, glass.ri)plt.plot(glass.al, glass.ri_pred, color='red')plt.xlabel('al')plt.ylabel('ri')
Out[9]:
In [10]:
# compute prediction for al=2 using the equationlinreg.intercept_ + linreg.coef_ * 2
Out[10]:
In [11]:
# compute prediction for al=2 using the predict methodlinreg.predict(2)
Out[11]:
In [12]:
# examine coefficient for alzip(feature_cols, linreg.coef_)
Out[12]:
In [13]:
# increasing al by 1 (so that al=3) decreases ri by 0.00251.51699012 - 0.0024776063874696243
Out[13]:
In [14]:
# compute prediction for al=3 using the predict methodlinreg.predict(3)
Out[14]:
In [15]:
# examine glass_typeglass.glass_type.value_counts().sort_index()
Out[15]:
In [16]:
# types 1, 2, 3 are window glass# types 5, 6, 7 are household glassglass['household'] = glass.glass_type.map({1:0, 2:0, 3:0, 5:1, 6:1, 7:1})glass.head()
Out[16]:
In [17]:
plt.scatter(glass.al, glass.household)plt.xlabel('al')plt.ylabel('household')
Out[17]:
In [18]:
# fit a linear regression model and store the predictionsfeature_cols = ['al']X = glass[feature_cols]y = glass.householdlinreg.fit(X, y)glass['household_pred'] = linreg.predict(X)
In [19]:
# scatter plot that includes the regression lineplt.scatter(glass.al, glass.household)plt.plot(glass.al, glass.household_pred, color='red')plt.xlabel('al')plt.ylabel('household')
Out[19]:
In [20]:
# understanding np.whereimport numpy as npnums = np.array([5, 15, 8])# np.where returns the first value if the condition is True, and the second value if the condition is Falsenp.where(nums > 10, 'big', 'small')
Out[20]:
In [21]:
# transform household_pred to 1 or 0glass['household_pred_class'] = np.where(glass.household_pred >= 0.5, 1, 0)glass.head()
Out[21]:
In [22]:
# plot the class predictionsplt.scatter(glass.al, glass.household)plt.plot(glass.al, glass.household_pred_class, color='red')plt.xlabel('al')plt.ylabel('household')
Out[22]:
In [23]:
# fit a logistic regression model and store the class predictionsfrom sklearn.linear_model import LogisticRegressionlogreg = LogisticRegression(C=1e9)feature_cols = ['al']X = glass[feature_cols]y = glass.householdlogreg.fit(X, y)glass['household_pred_class'] = logreg.predict(X)
In [24]:
# plot the class predictionsplt.scatter(glass.al, glass.household)plt.plot(glass.al, glass.household_pred_class, color='red')plt.xlabel('al')plt.ylabel('household')
Out[24]:
In [25]:
# store the predicted probabilites of class 1glass['household_pred_prob'] = logreg.predict_proba(X)[:, 1]
In [26]:
# plot the predicted probabilitiesplt.scatter(glass.al, glass.household)plt.plot(glass.al, glass.household_pred_prob, color='red')plt.xlabel('al')plt.ylabel('household')
Out[26]:
In [27]:
# examine some example predictionsprint logreg.predict_proba(1)print logreg.predict_proba(2)print logreg.predict_proba(3)
In [28]:
# create a table of probability versus oddstable = pd.DataFrame({'probability':[0.1, 0.2, 0.25, 0.5, 0.6, 0.8, 0.9]})table['odds'] = table.probability/(1 - table.probability)table
Out[28]:
In [29]:
# exponential function: e^1np.exp(1)
Out[29]:
In [30]:
# time needed to grow 1 unit to 2.718 unitsnp.log(2.718)
Out[30]:
In [31]:
np.log(np.exp(5))
Out[31]:
In [32]:
# add log-odds to the tabletable['logodds'] = np.log(table.odds)table
Out[32]:
In [33]:
# plot the predicted probabilities againplt.scatter(glass.al, glass.household)plt.plot(glass.al, glass.household_pred_prob, color='red')plt.xlabel('al')plt.ylabel('household')
Out[33]:
In [34]:
# compute predicted log-odds for al=2 using the equationlogodds = logreg.intercept_ + logreg.coef_[0] * 2logodds
Out[34]:
In [35]:
# convert log-odds to oddsodds = np.exp(logodds)odds
Out[35]:
In [36]:
# convert odds to probabilityprob = odds/(1 + odds)prob
Out[36]:
In [37]:
# compute predicted probability for al=2 using the predict_proba methodlogreg.predict_proba(2)[:, 1]
Out[37]:
In [38]:
# examine the coefficient for alzip(feature_cols, logreg.coef_[0])
Out[38]:
In [39]:
# increasing al by 1 (so that al=3) increases the log-odds by 4.18logodds = 0.64722323 + 4.1804038614510901odds = np.exp(logodds)prob = odds/(1 + odds)prob
Out[39]:
In [40]:
# compute predicted probability for al=3 using the predict_proba methodlogreg.predict_proba(3)[:, 1]
Out[40]:
In [41]:
# examine the interceptlogreg.intercept_
Out[41]:
In [42]:
# convert log-odds to probabilitylogodds = logreg.intercept_odds = np.exp(logodds)prob = odds/(1 + odds)prob
Out[42]:
In [43]:
# create a categorical featureglass['high_ba'] = np.where(glass.ba > 0.5, 1, 0)
In [44]:
# original (continuous) featuresns.lmplot(x='ba', y='household', data=glass, ci=None, logistic=True)
Out[44]:
In [45]:
# categorical featuresns.lmplot(x='high_ba', y='household', data=glass, ci=None, logistic=True)
Out[45]:
In [46]:
# categorical feature, with jitter addedsns.lmplot(x='high_ba', y='household', data=glass, ci=None, logistic=True, x_jitter=0.05, y_jitter=0.05)
Out[46]:
In [47]:
# fit a logistic regression modelfeature_cols = ['high_ba']X = glass[feature_cols]y = glass.householdlogreg.fit(X, y)
Out[47]:
In [48]:
# examine the coefficient for high_bazip(feature_cols, logreg.coef_[0])
Out[48]:
This web site does not host notebooks, it only renders notebooks available on other websites.
Delivered by Fastly, Rendered by Rackspace
nbviewer GitHub repository.
nbviewer version: b280bae
notebook version: 4.1.0
nbconvert version: 4.1.0
Rendered 30 minutes ago
0 0
- logistic regression
- logistic regression
- logistic regression
- Logistic Regression
- Logistic Regression
- logistic regression
- Logistic Regression
- Logistic Regression
- logistic regression
- Logistic regression
- Logistic Regression
- Logistic Regression
- Logistic Regression
- Logistic Regression
- Logistic Regression
- logistic regression
- logistic Regression
- Logistic regression
- Codeforces 479E Riding in a Lift (DP)
- 深入理解Java
- 欢迎使用CSDN-markdown编辑器
- 我发现的IT大牛的优秀博客
- 面试笔试杂项积累-leetcode 246-260
- Logistic Regression
- DP入门之数塔(2084)
- 如何向外行解释产品经理频繁更改需求为什么会令程序员烦恼?
- 文章标题
- 数据库入门
- python、nodejs和java性能对比
- Educational Codeforces Round 7 CF622C Not Equal on a Segment
- 数据库操作
- 数据库中约束与修改