Python Machine Learning 1 普通最小二乘法

来源：互联网发布：淘宝上的代练编辑：程序博客网时间：2024/06/18 03:45

sklearn实现的监督学习算法有：OLS,Ridge Regression,Lasso(multi-task lasso),Elastic net(multi-task elastic net),Logistic regression,Bayesian Regression,Perception,Robustness regression,support vector machine,naive bayes,decission tree,multilabel classification

1.1普通最小二乘

LinearRegression拟合具有系数的线性模型， $w =（w_1，...，w_p）$ 以最小化数据集中观察到的响应与通过线性逼近预测的响应之间的残差平方和。在数学上它解决了一个问题的形式： $\ underset {w} {min \，} {|| X w - y || _2} ^ 2$

LinearRegression将采用其fit方法数组X，y，并将 $w ^$ 线性模型的系数存储在其 coef_成员中

from sklearn import linear_modelreg = linear_model.LinearRegression()reg.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)reg.coef_array([ 0.5,  0.5])

普通最小二乘法的系数估计依赖于模型项的独立性。当相关项和设计矩阵的列 $X$ 具有近似的线性相关性时，设计矩阵变得接近奇异，因此最小二乘估计对观测响应中的随机误差高度敏感，产生大的方差。例如，当没有实验设计收集数据时，就会出现这种多重共线性的情况。

核心代码：

class LinearRegression(LinearModel, RegressorMixin):
"""
Ordinary least squares Linear Regression.
Parameters
----------
fit_intercept : boolean, optional, default Truewhether to calculate the intercept for this model.
normalize : boolean, optional, default False
If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm.
If you wish to standardize, please use :class:sklearn.preprocessing.StandardScaler` before calling ``fit`` on
an estimator with ``normalize=False``.
copy_X : boolean, optional, default True
n_jobs : int, optional, default 1
The number of jobs to use for the computation.
If -1 all CPUs are used. This will only provide speedup for
n_targets > 1 and sufficient large problems.
Attributes：
----------
coef_ : array, shape (n_features, ) or (n_targets, n_features)
Estimated coefficients for the linear regression problem.
If multiple targets are passed during the fit (y 2D), this is a 2D array of shape (n_targets, n_features), while if only
one target is passed, this is a 1D array of length n_features.
intercept_ : array Independent term in the linear model.
From the implementation point of view, this is just plain Ordinary
Least Squares (scipy.linalg.lstsq) wrapped as a predictor object.
"""
def __init__(self, fit_intercept=True, normalize=False, copy_X=True,
n_jobs=1):
self.fit_intercept = fit_intercept
self.normalize = normalize
self.copy_X = copy_X
self.n_jobs = n_jobs
def fit(self, X, y, sample_weight=None):
"""
Fit linear model.
Parameters
----------
X : numpy array or sparse matrix of shape [n_samples,n_features]
Training data
y : numpy array of shape [n_samples, n_targets]
Target values
sample_weight : numpy array of shape [n_samples]
Individual weights for each sample
parameter *sample_weight* support to LinearRegression.
"""
n_jobs_ = self.n_jobs
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
y_numeric=True, multi_output=True)
if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
raise ValueError("Sample weights must be 1D array or scalar")
X, y, X_offset, y_offset, X_scale = self._preprocess_data(
X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
copy=self.copy_X, sample_weight=sample_weight)
if sample_weight is not None:
# Sample weight can be implemented via a simple rescaling.
X, y = _rescale_data(X, y, sample_weight)
if sp.issparse(X):
if y.ndim < 2:
out = sparse_lsqr(X, y)
self.coef_ = out[0]
self._residues = out[3]
else:
# sparse_lstsq cannot handle y with shape (M, K)
outs = Parallel(n_jobs=n_jobs_)(
delayed(sparse_lsqr)(X, y[:, j].ravel())
for j in range(y.shape[1]))
self.coef_ = np.vstack(out[0] for out in outs)
self._residues = np.vstack(out[3] for out in outs)
else:
self.coef_, self._residues, self.rank_, self.singular_ = \
linalg.lstsq(X, y)
self.coef_ = self.coef_.T
if y.ndim == 1:
self.coef_ = np.ravel(self.coef_)
self._set_intercept(X_offset, y_offset, X_scale)
return self

def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
"""Aux function used at beginning of fit in linear models"""
n_samples, n_features = X.shape

if sparse.isspmatrix(X):
precompute = False
X, y, X_offset, y_offset, X_scale = _preprocess_data(
X, y, fit_intercept=fit_intercept, normalize=normalize,
return_mean=True)
else:
# copy was done in fit if necessary
X, y, X_offset, y_offset, X_scale = _preprocess_data(
X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy)
if hasattr(precompute, '__array__') and (
fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or
normalize and not np.allclose(X_scale, np.ones(n_features))):
warnings.warn("Gram matrix was provided but X was centered"
" to fit intercept, "
"or X was normalized : recomputing Gram matrix.",
UserWarning)
# recompute Gram
precompute = 'auto'
Xy = None

# precompute if n_samples > n_features
if isinstance(precompute, six.string_types) and precompute == 'auto':
precompute = (n_samples > n_features)

if precompute is True:
# make sure that the 'precompute' array is contiguous.
precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype,
order='C')
np.dot(X.T, X, out=precompute)

if not hasattr(precompute, '__array__'):
Xy = None # cannot use Xy if precompute is not Gram

if hasattr(precompute, '__array__') and Xy is None:
common_dtype = np.find_common_type([X.dtype, y.dtype], [])
if y.ndim == 1:
# Xy is 1d, make sure it is contiguous.
Xy = np.empty(shape=n_features, dtype=common_dtype, order='C')
np.dot(X.T, y, out=Xy)
else:
# Make sure that Xy is always F contiguous even if X or y are not
# contiguous: the goal is to make it fast to extract the data for a
# specific target.
n_targets = y.shape[1]
Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype,
order='F')
np.dot(y.T, X, out=Xy.T)
return X, y, X_offset, y_offset, X_scale, precompute, Xy

阅读全文

0 0