@@ -134,7 +134,7 @@ def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas,
134
134
with respect to the different parameters given in the initialization.
135
135
136
136
Returned gradients are packed in a single vector so it can be used
137
- in lbgfs
137
+ in lbfgs
138
138
139
139
Parameters
140
140
----------
@@ -345,8 +345,8 @@ def _fit(self, X, y, incremental=False):
345
345
# First time training the model
346
346
self ._initialize (y , layer_units )
347
347
348
- # lbgfs does not support mini-batches
349
- if self .solver == 'lbgfs ' :
348
+ # lbfgs does not support mini-batches
349
+ if self .solver == 'lbfgs ' :
350
350
batch_size = n_samples
351
351
elif self .batch_size == 'auto' :
352
352
batch_size = min (200 , n_samples )
@@ -375,7 +375,7 @@ def _fit(self, X, y, incremental=False):
375
375
intercept_grads , layer_units , incremental )
376
376
377
377
# Run the LBFGS solver
378
- elif self .solver == 'lbgfs ' :
378
+ elif self .solver == 'lbfgs ' :
379
379
self ._fit_lbfgs (X , y , activations , deltas , coef_grads ,
380
380
intercept_grads , layer_units )
381
381
return self
@@ -422,7 +422,7 @@ def _validate_hyperparameters(self):
422
422
if self .learning_rate not in ["constant" , "invscaling" , "adaptive" ]:
423
423
raise ValueError ("learning rate %s is not supported. " %
424
424
self .learning_rate )
425
- supported_solvers = _STOCHASTIC_SOLVERS + ["lbgfs " ]
425
+ supported_solvers = _STOCHASTIC_SOLVERS + ["lbfgs " ]
426
426
if self .solver not in supported_solvers :
427
427
raise ValueError ("The solver %s is not supported. "
428
428
" Expected one of: %s" %
@@ -704,10 +704,10 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
704
704
- 'relu', the rectified linear unit function,
705
705
returns f(x) = max(0, x)
706
706
707
- solver : {'lbgfs ', 'sgd', 'adam'}, default 'adam'
707
+ solver : {'lbfgs ', 'sgd', 'adam'}, default 'adam'
708
708
The solver for weight optimization.
709
709
710
- - 'lbgfs ' is an optimizer in the family of quasi-Newton methods.
710
+ - 'lbfgs ' is an optimizer in the family of quasi-Newton methods.
711
711
712
712
- 'sgd' refers to stochastic gradient descent.
713
713
@@ -717,15 +717,15 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
717
717
Note: The default solver 'adam' works pretty well on relatively
718
718
large datasets (with thousands of training samples or more) in terms of
719
719
both training time and validation score.
720
- For small datasets, however, 'lbgfs ' can converge faster and perform
720
+ For small datasets, however, 'lbfgs ' can converge faster and perform
721
721
better.
722
722
723
723
alpha : float, optional, default 0.0001
724
724
L2 penalty (regularization term) parameter.
725
725
726
726
batch_size : int, optional, default 'auto'
727
727
Size of minibatches for stochastic optimizers.
728
- If the solver is 'lbgfs ', the classifier will not use minibatch.
728
+ If the solver is 'lbfgs ', the classifier will not use minibatch.
729
729
When set to "auto", `batch_size=min(200, n_samples)`
730
730
731
731
learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'
@@ -1046,10 +1046,10 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
1046
1046
- 'relu', the rectified linear unit function,
1047
1047
returns f(x) = max(0, x)
1048
1048
1049
- solver : {'lbgfs ', 'sgd', 'adam'}, default 'adam'
1049
+ solver : {'lbfgs ', 'sgd', 'adam'}, default 'adam'
1050
1050
The solver for weight optimization.
1051
1051
1052
- - 'lbgfs ' is an optimizer in the family of quasi-Newton methods.
1052
+ - 'lbfgs ' is an optimizer in the family of quasi-Newton methods.
1053
1053
1054
1054
- 'sgd' refers to stochastic gradient descent.
1055
1055
@@ -1059,15 +1059,15 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
1059
1059
Note: The default solver 'adam' works pretty well on relatively
1060
1060
large datasets (with thousands of training samples or more) in terms of
1061
1061
both training time and validation score.
1062
- For small datasets, however, 'lbgfs ' can converge faster and perform
1062
+ For small datasets, however, 'lbfgs ' can converge faster and perform
1063
1063
better.
1064
1064
1065
1065
alpha : float, optional, default 0.0001
1066
1066
L2 penalty (regularization term) parameter.
1067
1067
1068
1068
batch_size : int, optional, default 'auto'
1069
1069
Size of minibatches for stochastic optimizers.
1070
- If the solver is 'lbgfs ', the classifier will not use minibatch.
1070
+ If the solver is 'lbfgs ', the classifier will not use minibatch.
1071
1071
When set to "auto", `batch_size=min(200, n_samples)`
1072
1072
1073
1073
learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'
0 commit comments