@@ -51,7 +51,7 @@ def __init__(self, hidden_layer_sizes, activation, solver,
51
51
max_iter , loss , shuffle , random_state , tol , verbose ,
52
52
warm_start , momentum , nesterovs_momentum , early_stopping ,
53
53
validation_fraction , beta_1 , beta_2 , epsilon ,
54
- n_iter_no_change ):
54
+ n_iter_no_change , max_fun ):
55
55
self .activation = activation
56
56
self .solver = solver
57
57
self .alpha = alpha
@@ -75,6 +75,7 @@ def __init__(self, hidden_layer_sizes, activation, solver,
75
75
self .beta_2 = beta_2
76
76
self .epsilon = epsilon
77
77
self .n_iter_no_change = n_iter_no_change
78
+ self .max_fun = max_fun
78
79
79
80
def _unpack (self , packed_parameters ):
80
81
"""Extract the coefficients and intercepts from packed_parameters."""
@@ -172,7 +173,6 @@ def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas,
172
173
self ._unpack (packed_coef_inter )
173
174
loss , coef_grads , intercept_grads = self ._backprop (
174
175
X , y , activations , deltas , coef_grads , intercept_grads )
175
- self .n_iter_ += 1
176
176
grad = _pack (coef_grads , intercept_grads )
177
177
return loss , grad
178
178
@@ -381,6 +381,8 @@ def _validate_hyperparameters(self):
381
381
self .shuffle )
382
382
if self .max_iter <= 0 :
383
383
raise ValueError ("max_iter must be > 0, got %s." % self .max_iter )
384
+ if self .max_fun <= 0 :
385
+ raise ValueError ("max_fun must be > 0, got %s." % self .max_fun )
384
386
if self .alpha < 0.0 :
385
387
raise ValueError ("alpha must be >= 0, got %s." % self .alpha )
386
388
if (self .learning_rate in ["constant" , "invscaling" , "adaptive" ] and
@@ -459,10 +461,29 @@ def _fit_lbfgs(self, X, y, activations, deltas, coef_grads,
459
461
optimal_parameters , self .loss_ , d = fmin_l_bfgs_b (
460
462
x0 = packed_coef_inter ,
461
463
func = self ._loss_grad_lbfgs ,
462
- maxfun = self .max_iter ,
464
+ maxfun = self .max_fun ,
465
+ maxiter = self .max_iter ,
463
466
iprint = iprint ,
464
467
pgtol = self .tol ,
465
468
args = (X , y , activations , deltas , coef_grads , intercept_grads ))
469
+ self .n_iter_ = d ['nit' ]
470
+ if d ['warnflag' ] == 1 :
471
+ if d ['nit' ] >= self .max_iter :
472
+ warnings .warn (
473
+ "LBFGS Optimizer: Maximum iterations (%d) "
474
+ "reached and the
341A
optimization hasn't converged yet."
475
+ % self .max_iter , ConvergenceWarning )
476
+ if d ['funcalls' ] >= self .max_fun :
477
+ warnings .warn (
478
+ "LBFGS Optimizer: Maximum function evaluations (%d) "
479
+ "reached and the optimization hasn't converged yet."
480
+ % self .max_fun , ConvergenceWarning )
481
+ elif d ['warnflag' ] == 2 :
482
+ warnings .warn (
483
+ "LBFGS Optimizer: Optimization hasn't converged yet, "
484
+ "cause of LBFGS stopping: %s."
485
+ % d ['task' ], ConvergenceWarning )
486
+
466
487
467
488
self ._unpack (optimal_parameters )
468
489
@@ -833,6 +854,15 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
833
854
834
855
.. versionadded:: 0.20
835
856
857
+ max_fun : int, optional, default 15000
858
+ Only used when solver='lbfgs'. Maximum number of loss function calls.
859
+ The solver iterates until convergence (determined by 'tol'), number
860
+ of iterations reaches max_iter, or this number of loss function calls.
861
+ Note that number of loss function calls will be greater than or equal
862
+ to the number of iterations for the `MLPClassifier`.
863
+
864
+ .. versionadded:: 0.22
865
+
836
866
Attributes
837
867
----------
838
868
classes_ : array or list of array of shape (n_classes,)
@@ -898,8 +928,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
898
928
verbose = False , warm_start = False , momentum = 0.9 ,
899
929
nesterovs_momentum = True , early_stopping = False ,
900
930
validation_fraction = 0.1 , beta_1 = 0.9 , beta_2 = 0.999 ,
901
- epsilon = 1e-8 , n_iter_no_change = 10 ):
902
-
931
+ epsilon = 1e-8 , n_iter_no_change = 10 , max_fun = 15000 ):
903
932
super ().__init__ (
904
933
hidden_layer_sizes = hidden_layer_sizes ,
905
934
activation = activation , solver = solver , alpha = alpha ,
@@ -912,7 +941,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
912
941
early_stopping = early_stopping ,
913
942
validation_fraction = validation_fraction ,
914
943
beta_1 = beta_1 , beta_2 = beta_2 , epsilon = epsilon ,
915
- n_iter_no_change = n_iter_no_change )
944
+ n_iter_no_change = n_iter_no_change , max_fun = max_fun )
916
945
917
946
def _validate_input (self , X , y , incremental ):
918
947
X , y = check_X_y (X , y , accept_sparse = ['csr' , 'csc' , 'coo' ],
@@ -1216,6 +1245,15 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
1216
1245
1217
1246
.. versionadded:: 0.20
1218
1247
1248
+ max_fun : int, optional, default 15000
1249
+ Only used when solver='lbfgs'. Maximum number of function calls.
1250
+ The solver iterates until convergence (determined by 'tol'), number
1251
+ of iterations reaches max_iter, or this number of function calls.
1252
+ Note that number of function calls will be greater than or equal to
1253
+ the number of iterations for the MLPRegressor.
1254
+
1255
+ .. versionadded:: 0.22
1256
+
1219
1257
Attributes
1220
1258
----------
1221
1259
loss_ : float
@@ -1279,8 +1317,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
1279
1317
verbose = False , warm_start = False , momentum = 0.9 ,
1280
1318
nesterovs_momentum = True , early_stopping = False ,
1281
1319
validation_fraction = 0.1 , beta_1 = 0.9 , beta_2 = 0.999 ,
1282
- epsilon = 1e-8 , n_iter_no_change = 10 ):
1283
-
1320
+ epsilon = 1e-8 , n_iter_no_change = 10 , max_fun = 15000 ):
1284
1321
super ().__init__ (
1285
1322
hidden_layer_sizes = hidden_layer_sizes ,
1286
1323
activation = activation , solver = solver , alpha = alpha ,
@@ -1293,7 +1330,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
1293
1330
early_stopping = early_stopping ,
1294
1331
validation_fraction = validation_fraction ,
1295
1332
beta_1 = beta_1 , beta_2 = beta_2 , epsilon = epsilon ,
1296
- n_iter_no_change = n_iter_no_change )
1333
+ n_iter_no_change = n_iter_no_change , max_fun = max_fun )
1297
1334
1298
1335
def predict (self , X ):
1299
1336
"""Predict using the multi-layer perceptron model.
0 commit comments