scikit-learn · jl2922 · May 13, 2017 · May 13, 2017 · May 13, 2017 · May 13, 2017
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
@@ -450,6 +450,44 @@ def __init__(self, fit_intercept=True, normalize=False, copy_X=True,
         self.copy_X = copy_X
         self.n_jobs = n_jobs
 
+    def matrix_fit(self, X, y, n, p, dof, sample_weight):
+        # Preprocess data.
+        y = np.array(y, copy=True).reshape(n, -1)
+        if self.fit_intercept:
+            X = np.append(X, np.ones((n, 1)), axis=1)
+            dof = dof - 1
+        if sample_weight is not None:
+            sample_weight = np.sqrt(np.array(sample_weight, copy=True))
+            try:
+                sample_weight = sample_weight.reshape(n, 1)
+                X = np.multiply(X, sample_weight)
+                y = np.multiply(y, sample_weight)
+            except:
+                pass
+
+        # Evaluate coefficients and standard deviation.
+        XT = X.T
+        try:
+            XTX_inv = np.linalg.inv(np.dot(XT, X))
+        except linalg.LinAlgError:
+            self.coef_ = np.zeros(1)
+            self.intercept_ = np.zeros(1)
+            return self
+        beta = np.dot(np.dot(XTX_inv, XT), y)
+        intercept = beta[-1] if self.fit_intercept else np.zeros(
+            y.shape[1])
+        coef = beta[0:-1] if self.fit_intercept else beta
+        residual = y - np.dot(X, beta)
+        self.coef_ = coef.T if coef.shape[1] > 1 else coef.reshape(p,)
+        self.intercept_ = intercept.reshape(-1,)
+
+        if dof > 0:
+            diag = np.diagonal(XTX_inv)
+            var_beta = np.outer(diag, np.sum(residual**2, axis=0)) / dof
+            stdev = np.sqrt(var_beta).reshape(beta.shape)
+            self.stdev_ = \
+                stdev.T if coef.shape[1] > 1 else stdev.reshape(-1,)
+
     def fit(self, X, y, sample_weight=None):
         """
         Fit linear model.
@@ -480,6 +518,15 @@ def fit(self, X, y, sample_weight=None):
         if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
             raise ValueError("Sample weights must be 1D array or scalar")
 
+        if not sp.issparse(X) and not self.normalize:
+            X = np.array(X)
+            n = X.shape[0]
+            p = X.shape[1]
+            dof = n - p
+            if dof >= 0:
+                self.matrix_fit(X, y, n, p, dof, sample_weight)
+                return self
+
         X, y, X_offset, y_offset, X_scale = self._preprocess_data(
             X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
             copy=self.copy_X, sample_weight=sample_weight)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
@@ -50,6 +50,39 @@ def test_linear_regression():
     assert_array_almost_equal(reg.predict(X), [0])
 
 
+def test_linear_regression_uncertainty():
+    # Test with a multivariate set.
+    X = [[5.0e-6, 2.5e-1],
+         [2.0e-6, 4.0e-2],
+         [1.0e-6, 1.0e-2],
+         [5.0e-7, 2.5e-3],
+         [2.0e-7, 4.0e-4],
+         [1.0e-7, 1.0e-4]]
+    Y = [-0.5930783034, -0.592875163, -0.5928283259,
+         -0.5928091337, -0.5928019288, -0.5928014142]
+
+    reg = LinearRegression()
+    reg.fit(X, Y)
+
+    # Compare with values from R.
+    assert_array_almost_equal(
+        reg.coef_, [-2.68764200535e+01, -5.90375272820e-04])
+    assert_array_almost_equal(reg.intercept_, [-5.92796478799e-01])
+    assert_array_almost_equal(
+        reg.stdev_, [2.10596119117, 3.97159995702e-05, 1.48683754360e-06])
+
+    # Test with sample weights.
+    X = [[6], [7], [8]]
+    y = [8000, 50000, 116000]
+    weight = [123, 123, 246]
+    reg = LinearRegression()
+    reg.fit(X, y, sample_weight=weight)
+
+    # Compare with values from R.
+    assert_array_almost_equal(reg.coef_, [55090.90909091])
+    assert_array_almost_equal(reg.intercept_, [-326909.09090909])
+    assert_array_almost_equal(reg.stdev_, [6171.11372672, 45032.21987029])
+
 def test_linear_regression_sample_weights():
     # TODO: loop over sparse data as well