Python Machine Learning 1 普通最小二乘法

来源:互联网 发布:淘宝上的代练 编辑:程序博客网 时间:2024/06/18 03:45

sklearn实现的监督学习算法有:OLS,Ridge Regression,Lasso(multi-task lasso),Elastic net(multi-task elastic net),Logistic regression,Bayesian Regression,Perception,Robustness regression,support vector machine,naive bayes,decission tree,multilabel classification

1.1普通最小二乘

LinearRegression拟合具有系数的线性模型, w =(w_1,...,w_p)以最小化数据集中观察到的响应与通过线性逼近预测的响应之间的残差平方和。在数学上它解决了一个问题的形式:\ underset {w} {min \,} {||  X w  -  y || _2} ^ 2

LinearRegression将采用其fit方法数组X,y,并将w ^线性模型的系数存储在其 coef_成员中

from sklearn import linear_modelreg = linear_model.LinearRegression()reg.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)reg.coef_array([ 0.5,  0.5])

普通最小二乘法的系数估计依赖于模型项的独立性。当相关项和设计矩阵的列X具有近似的线性相关性时,设计矩阵变得接近奇异,因此最小二乘估计对观测响应中的随机误差高度敏感,产生大的方差。例如,当没有实验设计收集数据时,就会出现这种多重共线性的情况

核心代码:

class LinearRegression(LinearModel, RegressorMixin):
    """
    Ordinary least squares Linear Regression.
    Parameters
    ----------
    fit_intercept : boolean, optional, default Truewhether to calculate the intercept for this model. 
    normalize : boolean, optional, default False
        If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm.
        If you wish to standardize, please use  :class:sklearn.preprocessing.StandardScaler` before calling ``fit`` on
        an estimator with ``normalize=False``.
    copy_X : boolean, optional, default True 
    n_jobs : int, optional, default 1
        The number of jobs to use for the computation.
        If -1 all CPUs are used. This will only provide speedup for
        n_targets > 1 and sufficient large problems.
    Attributes:
    ----------
    coef_ : array, shape (n_features, ) or (n_targets, n_features)
        Estimated coefficients for the linear regression problem.
        If multiple targets are passed during the fit (y 2D), this is a 2D array of shape (n_targets, n_features), while if only
        one target is passed, this is a 1D array of length n_features.
    intercept_ : array     Independent term in the linear model.
    From the implementation point of view, this is just plain Ordinary
    Least Squares (scipy.linalg.lstsq) wrapped as a predictor object.
    """
    def __init__(self, fit_intercept=True, normalize=False, copy_X=True,
                 n_jobs=1):
        self.fit_intercept = fit_intercept
        self.normalize = normalize
        self.copy_X = copy_X
        self.n_jobs = n_jobs
    def fit(self, X, y, sample_weight=None):
        """
        Fit linear model.
        Parameters
        ----------
        X : numpy array or sparse matrix of shape [n_samples,n_features]
            Training data
        y : numpy array of shape [n_samples, n_targets]
            Target values
        sample_weight : numpy array of shape [n_samples]
            Individual weights for each sample
            parameter *sample_weight* support to LinearRegression.
        """
        n_jobs_ = self.n_jobs
        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
                         y_numeric=True, multi_output=True)
        if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
            raise ValueError("Sample weights must be 1D array or scalar")
        X, y, X_offset, y_offset, X_scale = self._preprocess_data(
            X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
            copy=self.copy_X, sample_weight=sample_weight)
        if sample_weight is not None:
                                                                               # Sample weight can be implemented via a simple rescaling.
            X, y = _rescale_data(X, y, sample_weight)
        if sp.issparse(X):
            if y.ndim < 2:
                out = sparse_lsqr(X, y)
                self.coef_ = out[0]
                self._residues = out[3]
            else:
                # sparse_lstsq cannot handle y with shape (M, K)
                outs = Parallel(n_jobs=n_jobs_)(
                    delayed(sparse_lsqr)(X, y[:, j].ravel())
                    for j in range(y.shape[1]))
                self.coef_ = np.vstack(out[0] for out in outs)
                self._residues = np.vstack(out[3] for out in outs)
        else:
            self.coef_, self._residues, self.rank_, self.singular_ = \
                linalg.lstsq(X, y)
            self.coef_ = self.coef_.T
        if y.ndim == 1:
            self.coef_ = np.ravel(self.coef_)
        self._set_intercept(X_offset, y_offset, X_scale)
        return self

def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
    """Aux function used at beginning of fit in linear models"""
    n_samples, n_features = X.shape


    if sparse.isspmatrix(X):
        precompute = False
        X, y, X_offset, y_offset, X_scale = _preprocess_data(
            X, y, fit_intercept=fit_intercept, normalize=normalize,
            return_mean=True)
    else:
        # copy was done in fit if necessary
        X, y, X_offset, y_offset, X_scale = _preprocess_data(
            X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy)
    if hasattr(precompute, '__array__') and (
            fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or
            normalize and not np.allclose(X_scale, np.ones(n_features))):
        warnings.warn("Gram matrix was provided but X was centered"
                      " to fit intercept, "
                      "or X was normalized : recomputing Gram matrix.",
                      UserWarning)
        # recompute Gram
        precompute = 'auto'
        Xy = None


    # precompute if n_samples > n_features
    if isinstance(precompute, six.string_types) and precompute == 'auto':
        precompute = (n_samples > n_features)


    if precompute is True:
        # make sure that the 'precompute' array is contiguous.
        precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype,
                              order='C')
        np.dot(X.T, X, out=precompute)


    if not hasattr(precompute, '__array__'):
        Xy = None  # cannot use Xy if precompute is not Gram


    if hasattr(precompute, '__array__') and Xy is None:
        common_dtype = np.find_common_type([X.dtype, y.dtype], [])
        if y.ndim == 1:
            # Xy is 1d, make sure it is contiguous.
            Xy = np.empty(shape=n_features, dtype=common_dtype, order='C')
            np.dot(X.T, y, out=Xy)
        else:
            # Make sure that Xy is always F contiguous even if X or y are not
            # contiguous: the goal is to make it fast to extract the data for a
            # specific target.
            n_targets = y.shape[1]
            Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype,
                          order='F')
            np.dot(y.T, X, out=Xy.T)
    return X, y, X_offset, y_offset, X_scale, precompute, Xy




原创粉丝点击
热门问题 老师的惩罚 人脸识别 我在镇武司摸鱼那些年 重生之率土为王 我在大康的咸鱼生活 盘龙之生命进化 天生仙种 凡人之先天五行 春回大明朝 姑娘不必设防,我是瞎子 9个月小孩不吃饭怎么办 孩子被老师打了怎么办 孩子说老师打她怎么办 孩子的数学太差怎么办 2岁宝宝老要喝水怎么办 分手了还想她怎么办 5岁宝宝不会说话怎么办 2岁半宝宝说话晚怎么办 7岁儿童发烧39度怎么办 感冒发烧怎么办简单的退烧方法 生完孩子没奶水怎么办 梦见让狐狸咬了怎么办 1岁宝宝不吃辅食怎么办 母乳不够宝宝不吃奶粉怎么办 吃母乳的宝宝不吃奶粉怎么办 宝宝吃母乳不吃奶粉怎么办 1岁婴儿不吃辅食怎么办 不喝奶瓶的宝宝怎么办 母乳不足宝宝不吃奶粉怎么办 4岁宝宝注意力不集中怎么办 孩子上课不专心听讲怎么办 小孩上课不认真听讲怎么办 一年级孩子上课不认真听讲怎么办 打了孩子很自责怎么办 4岁宝宝讲话结巴怎么办 网销客户不说话怎么办 两岁宝宝不说话怎么办? 4岁儿童说话结巴怎么办 3岁宝宝说话结巴怎么办 6岁儿童舌头短怎么办 5岁宝宝说话结巴怎么办 两岁宝宝说话晚怎么办 6岁儿童说话结巴怎么办 2岁宝宝呕吐拉稀怎么办 2岁宝宝突然呕吐怎么办 2岁宝宝呕吐发烧怎么办 2岁宝宝呕吐厉害怎么办 1岁宝宝半夜呕吐怎么办 2岁半宝宝呕吐怎么办 2岁宝宝半夜呕吐怎么办 两岁宝宝一直吐怎么办