10000 FIX Adding max_fun parameter to MLP with lbfgs solver (#9274) · scikit-learn/scikit-learn@370b642 · GitHub
[go: up one dir, main page]

Skip to content

Commit 370b642

Browse files
daniel-perryrth
authored andcommitted
FIX Adding max_fun parameter to MLP with lbfgs solver (#9274)
Since MLP estimators doesn't provide the derivative of the objective function to `fmin_l_bfgs_b` it needs to be manually evaluated, and so the number of function calls can be significantly larger than the number of iterations.
1 parent faa9406 commit 370b642

File tree

3 files changed

+112
-30
lines changed

3 files changed

+112
-30
lines changed

doc/whats_new/v0.22.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,18 @@ Changelog
122122
rather than variance in this case.
123123
:pr:`13704` by `Roddy MacSween <rlms>`.
124124

125+
126+
:mod:`sklearn.neural_network`
127+
.............................
128+
129+
- |Feature| Add `max_fun` parameter in
130+
:class:`neural_network.BaseMultilayerPerceptron`,
131+
:class:`neural_network.MLPRegressor`, and
132+
:class:`neural_network.MLPClassifier` to give control over
133+
maximum number of function evaluation to not meet ``tol`` improvement.
134+
:issue:`9274` by :user:`Daniel Perry <daniel-perry>`.
135+
136+
125137
Miscellaneous
126138
.............
127139

sklearn/neural_network/multilayer_perceptron.py

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def __init__(self, hidden_layer_sizes, activation, solver,
5151
max_iter, loss, shuffle, random_state, tol, verbose,
5252
warm_start, momentum, nesterovs_momentum, early_stopping,
5353
validation_fraction, beta_1, beta_2, epsilon,
54-
n_iter_no_change):
54+
n_iter_no_change, max_fun):
5555
self.activation = activation
5656
self.solver = solver
5757
self.alpha = alpha
@@ -75,6 +75,7 @@ def __init__(self, hidden_layer_sizes, activation, solver,
7575
self.beta_2 = beta_2
7676
self.epsilon = epsilon
7777
self.n_iter_no_change = n_iter_no_change
78+
self.max_fun = max_fun
7879

7980
def _unpack(self, packed_parameters):
8081
"""Extract the coefficients and intercepts from packed_parameters."""
@@ -172,7 +173,6 @@ def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas,
172173
self._unpack(packed_coef_inter)
173174
loss, coef_grads, intercept_grads = self._backprop(
174175
X, y, activations, deltas, coef_grads, intercept_grads)
175-
self.n_iter_ += 1
176176
grad = _pack(coef_grads, intercept_grads)
177177
return loss, grad
178178

@@ -381,6 +381,8 @@ def _validate_hyperparameters(self):
381381
self.shuffle)
382382
if self.max_iter <= 0:
383383
raise ValueError("max_iter must be > 0, got %s." % self.max_iter)
384+
if self.max_fun <= 0:
385+
raise ValueError("max_fun must be > 0, got %s." % self.max_fun)
384386
if self.alpha < 0.0:
385387
raise ValueError("alpha must be >= 0, got %s." % self.alpha)
386388
if (self.learning_rate in ["constant", "invscaling", "adaptive"] and
@@ -459,10 +461,29 @@ def _fit_lbfgs(self, X, y, activations, deltas, coef_grads,
459461
optimal_parameters, self.loss_, d = fmin_l_bfgs_b(
460462
x0=packed_coef_inter,
461463
func=self._loss_grad_lbfgs,
462-
maxfun=self.max_iter,
464+
maxfun=self.max_fun,
465+
maxiter=self.max_iter,
463466
iprint=iprint,
464467
pgtol=self.tol,
465468
args=(X, y, activations, deltas, coef_grads, intercept_grads))
469+
self.n_iter_ = d['nit']
470+
if d['warnflag'] == 1:
471+
if d['nit'] >= self.max_iter:
472+
warnings.warn(
473+
"LBFGS Optimizer: Maximum iterations (%d) "
474+
"reached and the 341A optimization hasn't converged yet."
475+
% self.max_iter, ConvergenceWarning)
476+
if d['funcalls'] >= self.max_fun:
477+
warnings.warn(
478+
"LBFGS Optimizer: Maximum function evaluations (%d) "
479+
"reached and the optimization hasn't converged yet."
480+
% self.max_fun, ConvergenceWarning)
481+
elif d['warnflag'] == 2:
482+
warnings.warn(
483+
"LBFGS Optimizer: Optimization hasn't converged yet, "
484+
"cause of LBFGS stopping: %s."
485+
% d['task'], ConvergenceWarning)
486+
466487

467488
self._unpack(optimal_parameters)
468489

@@ -833,6 +854,15 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
833854
834855
.. versionadded:: 0.20
835856
857+
max_fun : int, optional, default 15000
858+
Only used when solver='lbfgs'. Maximum number of loss function calls.
859+
The solver iterates until convergence (determined by 'tol'), number
860+
of iterations reaches max_iter, or this number of loss function calls.
861+
Note that number of loss function calls will be greater than or equal
862+
to the number of iterations for the `MLPClassifier`.
863+
864+
.. versionadded:: 0.22
865+
836866
Attributes
837867
----------
838868
classes_ : array or list of array of shape (n_classes,)
@@ -898,8 +928,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
898928
verbose=False, warm_start=False, momentum=0.9,
899929
nesterovs_momentum=True, early_stopping=False,
900930
validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
901-
epsilon=1e-8, n_iter_no_change=10):
902-
931+
epsilon=1e-8, n_iter_no_change=10, max_fun=15000):
903932
super().__init__(
904933
hidden_layer_sizes=hidden_layer_sizes,
905934
activation=activation, solver=solver, alpha=alpha,
@@ -912,7 +941,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
912941
early_stopping=early_stopping,
913942
validation_fraction=validation_fraction,
914943
beta_1=beta_1, beta_2=beta_2, epsilon=epsilon,
915-
n_iter_no_change=n_iter_no_change)
944+
n_iter_no_change=n_iter_no_change, max_fun=max_fun)
916945

917946
def _validate_input(self, X, y, incremental):
918947
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
@@ -1216,6 +1245,15 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
12161245
12171246
.. versionadded:: 0.20
12181247
1248+
max_fun : int, optional, default 15000
1249+
Only used when solver='lbfgs'. Maximum number of function calls.
1250+
The solver iterates until convergence (determined by 'tol'), number
1251+
of iterations reaches max_iter, or this number of function calls.
1252+
Note that number of function calls will be greater than or equal to
1253+
the number of iterations for the MLPRegressor.
1254+
1255+
.. versionadded:: 0.22
1256+
12191257
Attributes
12201258
----------
12211259
loss_ : float
@@ -1279,8 +1317,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
12791317
verbose=False, warm_start=False, momentum=0.9,
12801318
nesterovs_momentum=True, early_stopping=False,
12811319
validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
1282-
epsilon=1e-8, n_iter_no_change=10):
1283-
1320+
epsilon=1e-8, n_iter_no_change=10, max_fun=15000):
12841321
super().__init__(
12851322
hidden_layer_sizes=hidden_layer_sizes,
12861323
activation=activation, solver=solver, alpha=alpha,
@@ -1293,7 +1330,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
12931330
early_stopping=early_stopping,
12941331
validation_fraction=validation_fraction,
12951332
beta_1=beta_1, beta_2=beta_2, epsilon=epsilon,
1296-
n_iter_no_change=n_iter_no_change)
1333+
n_iter_no_change=n_iter_no_change, max_fun=max_fun)
12971334

12981335
def predict(self, X):
12991336
"""Predict using the multi-layer perceptron model.

sklearn/neural_network/tests/test_mlp.py

Lines changed: 54 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
Xboston = StandardScaler().fit_transform(boston.data)[: 200]
4949
yboston = boston.target[:200]
5050

51+
regression_datasets = [(Xboston, yboston)]
52+
5153
iris = load_iris()
5254

5355
X_iris = iris.data
@@ -228,32 +230,30 @@ def loss_grad_fun(t):
228230
assert_almost_equal(numgrad, grad)
229231

230232

231-
def test_lbfgs_classification():
233+
@pytest.mark.parametrize('X,y', classification_datasets)
234+
def test_lbfgs_classification(X, y):
232235
# Test lbfgs on classification.
233236
# It should achieve a score higher than 0.95 for the binary and multi-class
234237
# versions of the digits dataset.
235-
for X, y in classification_datasets:
236-
X_train = X[:150]
237-
y_train = y[:150]
238-
X_test = X[150:]
239-
240-
expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind)
241-
242-
for activation in ACTIVATION_TYPES:
243-
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50,
244-
max_iter=150, shuffle=True, random_state=1,
245-
activation=activation)
246-
mlp.fit(X_train, y_train)
247-
y_predict = mlp.predict(X_test)
248-
assert mlp.score(X_train, y_train) > 0.95
249-
assert ((y_predict.shape[0], y_predict.dtype.kind) ==
250-
expected_shape_dtype)
238+
X_train = X[:150]
239+
y_train = y[:150]
240+
X_test = X[150:]
241+
expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind)
251242

252-
253-
def test_lbfgs_regression():
243+
for activation in ACTIVATION_TYPES:
244+
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50,
245+
max_iter=150, shuffle=True, random_state=1,
246+
activation=activation)
247+
mlp.fit(X_train, y_train)
248+
y_predict = mlp.predict(X_test)
249+
assert mlp.score(X_train, y_train) > 0.95
250+
assert ((y_predict.shape[0], y_predict.dtype.kind) ==
251+
expected_shape_dtype)
252+
253+
254+
@pytest.mark.parametrize('X,y', regression_datasets)
255+
def test_lbfgs_regression(X, y):
254256
# Test lbfgs on the boston dataset, a regression problems.
255-
X = Xboston
256-
y = yboston
257257
for activation in ACTIVATION_TYPES:
258258
mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50,
259259
max_iter=150, shuffle=True, random_state=1,
@@ -266,6 +266,39 @@ def test_lbfgs_regression():
266266
assert mlp.score(X, y) > 0.95
267267

268268

269+
@pytest.mark.parametrize('X,y', classification_datasets)
270+
def test_lbfgs_classification_maxfun(X, y):
271+
# Test lbfgs parameter max_fun.
272+
# It should independently limit the number of iterations for lbfgs.
273+
max_fun = 10
274+
# classification tests
275+
for activation in ACTIVATION_TYPES:
276+
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50,
277+
max_iter=150, max_fun=max_fun, shuffle=True,
278+
random_state=1, activation=activation)
279+
with pytest.warns(ConvergenceWarning):
280+
mlp.fit(X, y)
281+
assert max_fun >= mlp.n_iter_
282+
283+
284+
@pytest.mark.parametrize('X,y', regression_datasets)
285+
def test_lbfgs_regression_maxfun(X, y):
286+
# Test lbfgs parameter max_fun.
287+
# It should independently limit the number of iterations for lbfgs.
288+
max_fun = 10
289+
# regression tests
290+
for activation in ACTIVATION_TYPES:
291+
mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50,
292+
max_iter=150, max_fun=max_fun, shuffle=True,
293+
random_state=1, activation=activation)
294+
with pytest.warns(Con 718C vergenceWarning):
295+
mlp.fit(X, y)
296+
assert max_fun >= mlp.n_iter_
297+
298+
mlp.max_fun = -1
299+
assert_raises(ValueError, mlp.fit, X, y)
300+
301+
269302
def test_learning_rate_warmstart():
270303
# Tests that warm_start reuse past solutions.
271304
X = [[3, 2], [1, 6], [5, 6], [-2, -4]]

0 commit comments

Comments
 (0)
0