|
13 | 13 |
|
14 | 14 | from sklearn.utils._testing import assert_array_almost_equal
|
15 | 15 | from sklearn.utils._testing import assert_array_equal
|
16 |
| -from sklearn.utils._testing import assert_almost_equal |
17 | 16 | from sklearn.utils._testing import assert_allclose
|
18 | 17 | from sklearn.utils import check_random_state
|
19 | 18 |
|
|
26 | 25 | from sklearn.datasets import make_regression
|
27 | 26 | from sklearn.datasets import load_iris
|
28 | 27 | from sklearn.preprocessing import StandardScaler
|
| 28 | +from sklearn.preprocessing import add_dummy_feature |
29 | 29 |
|
30 | 30 | rng = np.random.RandomState(0)
|
31 | 31 | rtol = 1e-6
|
@@ -55,45 +55,42 @@ def test_linear_regression():
|
55 | 55 | assert_array_almost_equal(reg.predict(X), [0])
|
56 | 56 |
|
57 | 57 |
|
58 |
| -def test_linear_regression_sample_weights(): |
59 |
| - # TODO: loop over sparse data as well |
60 |
| - |
| 58 | +@pytest.mark.parametrize("array_constr", [np.array, sparse.csr_matrix]) |
| 59 | +@pytest.mark.parametrize("fit_intercept", [True, False]) |
| 60 | +def test_linear_regression_sample_weights(array_constr, fit_intercept): |
61 | 61 | rng = np.random.RandomState(0)
|
62 | 62 |
|
63 | 63 | # It would not work with under-determined systems
|
64 |
| - for n_samples, n_features in ((6, 5),): |
| 64 | + n_samples, n_features = 6, 5 |
65 | 65 |
|
66 |
| - y = rng.randn(n_samples) |
67 |
| - X = rng.randn(n_samples, n_features) |
68 |
| - sample_weight = 1.0 + rng.rand(n_samples) |
| 66 | + X = array_constr(rng.normal(size=(n_samples, n_features))) |
| 67 | + y = rng.normal(size=n_samples) |
69 | 68 |
|
70 |
| - for intercept in (True, False): |
| 69 | + sample_weight = 1.0 + rng.uniform(size=n_samples) |
71 | 70 |
|
72 |
| - # LinearRegression with explicit sample_weight |
73 |
| - reg = LinearRegression(fit_intercept=intercept) |
74 |
| - reg.fit(X, y, sample_weight=sample_weight) |
75 |
| - coefs1 = reg.coef_ |
76 |
| - inter1 = reg.intercept_ |
| 71 | + # LinearRegression with explicit sample_weight |
| 72 | + reg = LinearRegression(fit_intercept=fit_intercept) |
| 73 | + reg.fit(X, y, sample_weight=sample_weight) |
| 74 | + coefs1 = reg.coef_ |
| 75 | + inter1 = reg.intercept_ |
77 | 76 |
|
78 |
| - assert reg.coef_.shape == (X.shape[1],) # sanity checks |
79 |
| - assert reg.score(X, y) > 0.5 |
| 77 | + assert reg.coef_.shape == (X.shape[1],) # sanity checks |
| 78 | + assert reg.score(X, y) > 0.5 |
80 | 79 |
|
81 |
| - # Closed form of the weighted least square |
82 |
| - # theta = (X^T W X)^(-1) * X^T W y |
83 |
| - W = np.diag(sample_weight) |
84 |
| - if intercept is False: |
85 |
| - X_aug = X |
86 |
| - else: |
87 |
| - dummy_column = np.ones(shape=(n_samples, 1)) |
88 |
| - X_aug = np.concatenate((dummy_column, X), axis=1) |
| 80 | + # Closed form of the weighted least square |
| 81 | + # theta = (X^T W X)^(-1) @ X^T W y |
| 82 | + W = np.diag(sample_weight) |
| 83 | + X_aug = X if not fit_intercept else add_dummy_feature(X) |
89 | 84 |
|
90 |
| - coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug), X_aug.T.dot(W).dot(y)) |
| 85 | + Xw = X_aug.T @ W @ X_aug |
| 86 | + yw = X_aug.T @ W @ y |
| 87 | + coefs2 = linalg.solve(Xw, yw) |
91 | 88 |
|
92 |
| - if intercept is False: |
93 |
| - assert_array_almost_equal(coefs1, coefs2) |
94 |
| - else: |
95 |
| - assert_array_almost_equal(coefs1, coefs2[1:]) |
96 |
| - assert_almost_equal(inter1, coefs2[0]) |
| 89 | + if not fit_intercept: |
| 90 | + assert_allclose(coefs1, coefs2) |
| 91 | + else: |
| 92 | + assert_allclose(coefs1, coefs2[1:]) |
| 93 | + assert_allclose(inter1, coefs2[0]) |
97 | 94 |
|
98 | 95 |
|
99 | 96 | def test_raises_value_error_if_positive_and_sparse():
|
|
0 commit comments