10000 TST/FIX Add check for estimator: parameters not modified by `fit` (#7… · Pthinker/scikit-learn@be305ce · GitHub
[go: up one dir, main page]

Skip to content

Commit be305ce

Browse files
kiotejnothman
authored andcommitted
TST/FIX Add check for estimator: parameters not modified by fit (scikit-learn#7846)
ensure that estimators only add private attributes and attributes with trailing _ in cases when existing estimators don't follow this new rule, we deprecate the attributes and make them follow this rule
1 parent aaebee1 commit be305ce

File tree

11 files changed

+199
-42
lines changed

11 files changed

+199
-42
lines changed

examples/covariance/plot_sparse_cov.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@
126126
# plot the model selection metric
127127
plt.figure(figsize=(4, 3))
128128
plt.axes([.2, .15, .75, .7])
129-
plt.plot(model.cv_alphas_, np.mean(model.grid_scores, axis=1), 'o-')
129+
plt.plot(model.cv_alphas_, np.mean(model.grid_scores_, axis=1), 'o-')
130130
plt.axvline(model.alpha_, color='.5')
131131
plt.title('Model selection')
132132
plt.ylabel('Cross-validation score')

sklearn/covariance/graph_lasso_.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from ..exceptions import ConvergenceWarning
2020
from ..utils.extmath import pinvh
2121
from ..utils.validation import check_random_state, check_array
22+
from ..utils import deprecated
2223
from ..linear_model import lars_path
2324
from ..linear_model import cd_fast
2425
from ..model_selection import check_cv, cross_val_score
@@ -525,7 +526,7 @@ class GraphLassoCV(GraphLasso):
525526
cv_alphas_ : list of float
526527
All penalization parameters explored.
527528
528-
`grid_scores`: 2D numpy.ndarray (n_alphas, n_folds)
529+
grid_scores_ : 2D numpy.ndarray (n_alphas, n_folds)
529530
Log-likelihood score on left-out data across folds.
530531
531532
n_iter_ : int
@@ -564,6 +565,12 @@ def __init__(self, alphas=4, n_refinements=4, cv=None, tol=1e-4,
564565
# The base class needs this for the score method
565566
self.store_precision = True
566567

568+
@property
569+
@deprecated("Attribute grid_scores was deprecated in version 0.19 and "
570+
"will be removed in 0.21. Use 'grid_scores_' instead")
571+
def grid_scores(self):
572+
return self.grid_scores_
573+
567574
def fit(self, X, y=None):
568575
"""Fits the GraphLasso covariance model to X.
569576
@@ -680,7 +687,7 @@ def fit(self, X, y=None):
680687
grid_scores.append(cross_val_score(EmpiricalCovariance(), X,
681688
cv=cv, n_jobs=self.n_jobs,
682689
verbose=inner_verbose))
683-
self.grid_scores = np.array(grid_scores)
690+
self.grid_scores_ = np.array(grid_scores)
684691
best_alpha = alphas[best_index]
685692
self.alpha_ = best_alpha
686693
self.cv_alphas_ = alphas

sklearn/covariance/tests/test_graph_lasso.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from sklearn.utils.testing import assert_array_almost_equal
99
from sklearn.utils.testing import assert_array_less
10+
from sklearn.utils.testing import assert_warns_message
1011

1112
from sklearn.covariance import (graph_lasso, GraphLasso, GraphLassoCV,
1213
empirical_covariance)
@@ -15,6 +16,8 @@
1516
from sklearn.utils import check_random_state
1617
from sklearn import datasets
1718

19+
from numpy.testing import assert_equal
20+
1821

1922
def test_graph_lasso(random_state=0):
2023
# Sample data from a sparse multivariate normal
@@ -131,3 +134,23 @@ def test_graph_lasso_cv(random_state=1):
131134

132135
# Smoke test with specified alphas
133136
GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
137+
138+
139+
def test_deprecated_grid_scores(random_state=1):
140+
dim = 5
141+
n_samples = 6
142+
random_state = check_random_state(random_state)
143+
prec = make_sparse_spd_matrix(dim, alpha=.96,
144+
random_state=random_state)
145+
cov = linalg.inv(prec)
146+
X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
147+
graph_lasso = GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1)
148+
graph_lasso.fit(X)
149+
150+
depr_message = ("Attribute grid_scores was deprecated in version "
151+
"0.19 and will be removed in 0.21. Use "
152+
"'grid_scores_' instead")
153+
154+
assert_warns_message(DeprecationWarning, depr_message,
155+
lambda: graph_lasso.grid_scores)
156+
assert_equal(graph_lasso.grid_scores, graph_lasso.grid_scores_)

sklearn/ensemble/gradient_boosting.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
from ..utils.extmath import logsumexp
5858
from ..utils.fixes import expit
5959
from ..utils.fixes import bincount
60+
from ..utils import deprecated
6061
from ..utils.stats import _weighted_percentile
6162
from ..utils.validation import check_is_fitted
6263
from ..utils.multiclass import check_classification_targets
@@ -846,25 +847,26 @@ def _check_params(self):
846847
if self.max_features == "auto":
847848
# if is_classification
848849
if self.n_classes_ > 1:
849-
max_features = max(1, int(np.sqrt(self.n_features)))
850+
max_features = max(1, int(np.sqrt(self.n_features_)))
850851
else:
851852
# is regression
852-
max_features = self.n_features
853+
max_features = self.n_features_
853854
elif self.max_features == "sqrt":
854-
max_features = max(1, int(np.sqrt(self.n_features)))
855+
max_features = max(1, int(np.sqrt(self.n_features_)))
855856
elif self.max_features == "log2":
856-
max_features = max(1, int(np.log2(self.n_features)))
857+
max_features = max(1, int(np.log2(self.n_features_)))
857858
else:
858859
raise ValueError("Invalid value for max_features: %r. "
859860
"Allowed string values are 'auto', 'sqrt' "
860861
"or 'log2'." % self.max_features)
861862
elif self.max_features is None:
862-
max_features = self.n_features
863+
max_features = self.n_features_
863864
elif isinstance(self.max_features, (numbers.Integral, np.integer)):
864865
max_features = self.max_features
865866
else: # float
866867
if 0. < self.max_features <= 1.:
867-
max_features = max(int(self.max_features * self.n_features), 1)
868+
max_features = max(int(self.max_features *
869+
self.n_features_), 1)
868870
else:
869871
raise ValueError("max_features must be in (0, n_features]")
870872

@@ -924,6 +926,12 @@ def _check_initialized(self):
924926
"""Check that the estimator is initialized, raising an error if not."""
925927
check_is_fitted(self, 'estimators_')
926928

929+
@property
930+
@deprecated("Attribute n_features was deprecated in version 0.19 and "
931+
"will be removed in 0.21.")
932+
def n_features(self):
933+
return self.n_features_
934+
927935
def fit(self, X, y, sample_weight=None, monitor=None):
928936
"""Fit the gradient boosting model.
929937
@@ -965,7 +973,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
965973

966974
# Check input
967975
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], dtype=DTYPE)
968-
n_samples, self.n_features = X.shape
976+
n_samples, self.n_features_ = X.shape
969977
if sample_weight is None:
970978
sample_weight = np.ones(n_samples, dtype=np.float32)
971979
else:
@@ -1106,9 +1114,9 @@ def _init_decision_function(self, X):
11061114
"""Check input and compute prediction of ``init``. """
11071115
self._check_initialized()
11081116
X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)
1109-
if X.shape[1] != self.n_features:
1117+
if X.shape[1] != self.n_features_:
11101118
raise ValueError("X.shape[1] should be {0:d}, not {1:d}.".format(
1111-
self.n_features, X.shape[1]))
1119+
self.n_features_, X.shape[1]))
11121120
score = self.init_.predict(X).astype(np.float64)
11131121
return score
11141122

@@ -1158,7 +1166,7 @@ def feature_importances_(self):
11581166
"""
11591167
self._check_initialized()
11601168

1161-
total_sum = np.zeros((self.n_features, ), dtype=np.float64)
1169+
total_sum = np.zeros((self.n_features_, ), dtype=np.float64)
11621170
for stage in self.estimators_:
11631171
stage_sum = sum(tree.feature_importances_
11641172
for tree in stage) / len(stage)

sklearn/ensemble/partial_dependence.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,9 @@ def partial_dependence(gbrt, target_variables, grid=None, X=None,
129129
target_variables = np.asarray(target_variables, dtype=np.int32,
130130
order='C').ravel()
131131

132-
if any([not (0 <= fx < gbrt.n_features) for fx in target_variables]):
132+
if any([not (0 <= fx < gbrt.n_features_) for fx in target_variables]):
133133
raise ValueError('target_variables must be in [0, %d]'
134-
% (gbrt.n_features - 1))
134+
% (gbrt.n_features_ - 1))
135135

136136
if X is not None:
137137
X = check_array(X, dtype=DTYPE, order='C')
@@ -258,8 +258,8 @@ def plot_partial_dependence(gbrt, X, features, feature_names=None,
258258
label_idx = 0
259259

260260
X = check_array(X, dtype=DTYPE, order='C')
261-
if gbrt.n_features != X.shape[1]:
262-
raise ValueError('X.shape[1] does not match gbrt.n_features')
261+
if gbrt.n_features_ != X.shape[1]:
262+
raise ValueError('X.shape[1] does not match gbrt.n_features_')
263263

264264
if line_kw is None:
265265
line_kw = {'color': 'green'}
@@ -269,7 +269,7 @@ def plot_partial_dependence(gbrt, X, features, feature_names=None,
269269
# convert feature_names to list
270270
if feature_names is None:
271271
# if not feature_names use fx indices as name
272-
feature_names = [str(i) for i in range(gbrt.n_features)]
272+
feature_names = [str(i) for i in range(gbrt.n_features_)]
273273
elif isinstance(feature_names, np.ndarray):
274274
feature_names = feature_names.tolist()
275275

sklearn/gaussian_process/gpr.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
1616
from sklearn.utils import check_random_state
1717
from sklearn.utils.validation import check_X_y, check_array
18+
from sklearn.utils.deprecation import deprecated
1819

1920

2021
class GaussianProcessRegressor(BaseEstimator, RegressorMixin):
@@ -140,8 +141,20 @@ def __init__(self, kernel=None, alpha=1e-10,
140141
self.copy_X_train = copy_X_train
141142
self.random_state = random_state
142143

144+
@property
145+
@deprecated("Attribute rng was deprecated in version 0.19 and "
146+
"will be removed in 0.21.")
147+
def rng(self):
148+
return self._rng
149+
150+
@property
151+
@deprecated("Attribute y_train_mean was deprecated in version 0.19 and "
152+
"will be removed in 0.21.")
153+
def y_train_mean(self):
154+
return self._y_train_mean
155+
143156
def fit(self, X, y):
144-
"""Fit Gaussian process regression model
157+
"""Fit Gaussian process regression model.
145158
146159
Parameters
147160
----------
@@ -161,17 +174,17 @@ def fit(self, X, y):
161174
else:
162175
self.kernel_ = clone(self.kernel)
163176

164-
self.rng = check_random_state(self.random_state)
177+
self._rng = check_random_state(self.random_state)
165178

166179
X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
167180

168181
# Normalize target value
169182
if self.normalize_y:
170-
self.y_train_mean = np.mean(y, axis=0)
183+
self._y_train_mean = np.mean(y, axis=0)
171184
# demean y
172-
y = y - self.y_train_mean
185+
y = y - self._y_train_mean
173186
else:
174-
self.y_train_mean = np.zeros(1)
187+
self._y_train_mean = np.zeros(1)
175188

176189
if np.iterable(self.alpha) \
177190
and self.alpha.shape[0] != y.shape[0]:
@@ -211,7 +224,7 @@ def obj_func(theta, eval_gradient=True):
211224
bounds = self.kernel_.bounds
212225
for iteration in range(self.n_restarts_optimizer):
213226
theta_initial = \
214-
self.rng.uniform(bounds[:, 0], bounds[:, 1])
227+
self._rng.uniform(bounds[:, 0], bounds[:, 1])
215228
optima.append(
216229
self._constrained_optimization(obj_func, theta_initial,
217230
bounds))
@@ -287,7 +300,7 @@ def predict(self, X, return_std=False, return_cov=False):
287300
else: # Predict based on GP posterior
288301
K_trans = self.kernel_(X, self.X_train_)
289302
y_mean = K_trans.dot(self.alpha_) # Line 4 (y_mean = f_star)
290-
y_mean = self.y_train_mean + y_mean # undo normal.
303+
y_mean = self._y_train_mean + y_mean # undo normal.
291304
if return_cov:
292305
v = cho_solve((self.L_, True), K_trans.T) # Line 5
293306
y_cov = self.kernel_(X) - K_trans.dot(v) # Line 6

sklearn/linear_model/least_angle.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -587,14 +587,15 @@ class Lars(LinearModel, RegressorMixin):
587587
sklearn.decomposition.sparse_encode
588588
589589
"""
590+
method = 'lar'
591+
590592
def __init__(self, fit_intercept=True, verbose=False, normalize=True,
591593
precompute='auto', n_nonzero_coefs=500,
592594
eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
593595
positive=False):
594596
self.fit_intercept = fit_intercept
595597
self.verbose = verbose
596598
self.normalize = normalize
597-
self.method = 'lar'
598599
self.precompute = precompute
599600
self.n_nonzero_coefs = n_nonzero_coefs
600601
self.positive = positive
@@ -827,6 +828,7 @@ class LassoLars(Lars):
827828
sklearn.decomposition.sparse_encode
828829
829830
"""
831+
method = 'lasso'
830832

831833
def __init__(self, alpha=1.0, fit_intercept=True, verbose=False,
832834
normalize=True, precompute='auto', max_iter=500,
@@ -837,7 +839,6 @@ def __init__(self, alpha=1.0, fit_intercept=True, verbose=False,
837839
self.max_iter = max_iter
838840
self.verbose = verbose
839841
self.normalize = normalize
840-
self.method = 'lasso'
841842
self.positive = positive
842843
self.precompute = precompute
843844
self.copy_X = copy_X
@@ -1075,17 +1076,16 @@ def __init__(self, fit_intercept=True, verbose=False, max_iter=500,
10751076
normalize=True, precompute='auto', cv=None,
10761077
max_n_alphas=1000, n_jobs=1, eps=np.finfo(np.float).eps,
10771078
copy_X=True, positive=False):
1078-
self.fit_intercept = fit_intercept
1079-
self.positive = positive
10801079
self.max_iter = max_iter
1081-
self.verbose = verbose
1082-
self.normalize = normalize
1083-
self.precompute = precompute
1084-
self.copy_X = copy_X
10851080
self.cv = cv
10861081
self.max_n_alphas = max_n_alphas
10871082
self.n_jobs = n_jobs
1088-
self.eps = eps
1083+
super(LarsCV, self).__init__(fit_intercept=fit_intercept,
1084+
verbose=verbose, normalize=normalize,
1085+
precompute=precompute,
1086+
n_nonzero_coefs=500,
1087+
eps=eps, copy_X=copy_X, fit_path=True,
1088+
positive=positive)
10891089

10901090
def fit(self, X, y):
10911091
"""Fit the model using X, y as training data.
@@ -1103,7 +1103,6 @@ def fit(self, X, y):
11031103
self : object
11041104
returns an instance of self.
11051105
"""
1106-
self.fit_path = True
11071106
X, y = check_X_y(X, y, y_numeric=True)
11081107
X = as_float_array(X, copy=self.copy_X)
11091108
y = as_float_array(y, copy=self.copy_X)
@@ -1428,6 +1427,7 @@ def __init__(self, criterion='aic', fit_intercept=True, verbose=False,
14281427
self.copy_X = copy_X
14291428
self.precompute = precompute
14301429
self.eps = eps
1430+
self.fit_path = True
14311431

14321432
def fit(self, X, y, copy_X=True):
14331433
"""Fit the model using X, y as training data.
@@ -1448,7 +1448,6 @@ def fit(self, X, y, copy_X=True):
14481448
self : object
14491449
returns an instance of self.
14501450
"""
1451-
self.fit_path = True
14521451
X, y = check_X_y(X, y, y_numeric=True)
14531452

14541453
X, y, Xmean, ymean, Xstd = LinearModel._preprocess_data(

0 commit comments

Comments
 (0)
0