From 431e8796a2a26c79b716f43a7c153535a4b541a8 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 27 Sep 2016 13:20:24 -0400 Subject: [PATCH] fix lbfgs rename --- doc/modules/neural_networks_supervised.rst | 8 +++--- .../neural_network/multilayer_perceptron.py | 26 +++++++++---------- sklearn/neural_network/tests/test_mlp.py | 18 ++++++------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst index f94ecd72d4a43..43cadeb997ec2 100644 --- a/doc/modules/neural_networks_supervised.rst +++ b/doc/modules/neural_networks_supervised.rst @@ -86,7 +86,7 @@ training samples:: >>> from sklearn.neural_network import MLPClassifier >>> X = [[0., 0.], [1., 1.]] >>> y = [0, 1] - >>> clf = MLPClassifier(solver='lbgfs', alpha=1e-5, + >>> clf = MLPClassifier(solver='lbfgs', alpha=1e-5, ... hidden_layer_sizes=(5, 2), random_state=1) ... >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE @@ -95,7 +95,7 @@ training samples:: epsilon=1e-08, hidden_layer_sizes=(5, 2), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, - solver='lbgfs', tol=0.0001, validation_fraction=0.1, verbose=False, + solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) After fitting (training), the model can predict labels for new samples:: @@ -134,7 +134,7 @@ indices where the value is `1` represents the assigned classes of that sample:: >>> X = [[0., 0.], [1., 1.]] >>> y = [[0, 1], [1, 1]] - >>> clf = MLPClassifier(solver='lbgfs', alpha=1e-5, + >>> clf = MLPClassifier(solver='lbfgs', alpha=1e-5, ... hidden_layer_sizes=(15,), random_state=1) ... >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE @@ -143,7 +143,7 @@ indices where the value is `1` represents the assigned classes of that sample:: epsilon=1e-08, hidden_layer_sizes=(15,), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, - solver='lbgfs', tol=0.0001, validation_fraction=0.1, verbose=False, + solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) >>> clf.predict([1., 2.]) array([[1, 1]]) diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index 87ea951533eb5..9f7117eb1b118 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -134,7 +134,7 @@ def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas, with respect to the different parameters given in the initialization. Returned gradients are packed in a single vector so it can be used - in lbgfs + in lbfgs Parameters ---------- @@ -345,8 +345,8 @@ def _fit(self, X, y, incremental=False): # First time training the model self._initialize(y, layer_units) - # lbgfs does not support mini-batches - if self.solver == 'lbgfs': + # lbfgs does not support mini-batches + if self.solver == 'lbfgs': batch_size = n_samples elif self.batch_size == 'auto': batch_size = min(200, n_samples) @@ -375,7 +375,7 @@ def _fit(self, X, y, incremental=False): intercept_grads, layer_units, incremental) # Run the LBFGS solver - elif self.solver == 'lbgfs': + elif self.solver == 'lbfgs': self._fit_lbfgs(X, y, activations, deltas, coef_grads, intercept_grads, layer_units) return self @@ -422,7 +422,7 @@ def _validate_hyperparameters(self): if self.learning_rate not in ["constant", "invscaling", "adaptive"]: raise ValueError("learning rate %s is not supported. " % self.learning_rate) - supported_solvers = _STOCHASTIC_SOLVERS + ["lbgfs"] + supported_solvers = _STOCHASTIC_SOLVERS + ["lbfgs"] if self.solver not in supported_solvers: raise ValueError("The solver %s is not supported. " " Expected one of: %s" % @@ -704,10 +704,10 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin): - 'relu', the rectified linear unit function, returns f(x) = max(0, x) - solver : {'lbgfs', 'sgd', 'adam'}, default 'adam' + solver : {'lbfgs', 'sgd', 'adam'}, default 'adam' The solver for weight optimization. - - 'lbgfs' is an optimizer in the family of quasi-Newton methods. + - 'lbfgs' is an optimizer in the family of quasi-Newton methods. - 'sgd' refers to stochastic gradient descent. @@ -717,7 +717,7 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin): Note: The default solver 'adam' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. - For small datasets, however, 'lbgfs' can converge faster and perform + For small datasets, however, 'lbfgs' can converge faster and perform better. alpha : float, optional, default 0.0001 @@ -725,7 +725,7 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin): batch_size : int, optional, default 'auto' Size of minibatches for stochastic optimizers. - If the solver is 'lbgfs', the classifier will not use minibatch. + If the solver is 'lbfgs', the classifier will not use minibatch. When set to "auto", `batch_size=min(200, n_samples)` learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant' @@ -1046,10 +1046,10 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin): - 'relu', the rectified linear unit function, returns f(x) = max(0, x) - solver : {'lbgfs', 'sgd', 'adam'}, default 'adam' + solver : {'lbfgs', 'sgd', 'adam'}, default 'adam' The solver for weight optimization. - - 'lbgfs' is an optimizer in the family of quasi-Newton methods. + - 'lbfgs' is an optimizer in the family of quasi-Newton methods. - 'sgd' refers to stochastic gradient descent. @@ -1059,7 +1059,7 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin): Note: The default solver 'adam' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. - For small datasets, however, 'lbgfs' can converge faster and perform + For small datasets, however, 'lbfgs' can converge faster and perform better. alpha : float, optional, default 0.0001 @@ -1067,7 +1067,7 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin): batch_size : int, optional, default 'auto' Size of minibatches for stochastic optimizers. - If the solver is 'lbgfs', the classifier will not use minibatch. + If the solver is 'lbfgs', the classifier will not use minibatch. When set to "auto", `batch_size=min(200, n_samples)` learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant' diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py index b8552246ceef6..e54a02d31cc05 100644 --- a/sklearn/neural_network/tests/test_mlp.py +++ b/sklearn/neural_network/tests/test_mlp.py @@ -176,7 +176,7 @@ def test_gradient(): for activation in ACTIVATION_TYPES: mlp = MLPClassifier(activation=activation, hidden_layer_sizes=10, - solver='lbgfs', alpha=1e-5, + solver='lbfgs', alpha=1e-5, learning_rate_init=0.2, max_iter=1, random_state=1) mlp.fit(X, y) @@ -235,7 +235,7 @@ def test_lbfgs_classification(): expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind) for activation in ACTIVATION_TYPES: - mlp = MLPClassifier(solver='lbgfs', hidden_layer_sizes=50, + mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, max_iter=150, shuffle=True, random_state=1, activation=activation) mlp.fit(X_train, y_train) @@ -250,7 +250,7 @@ def test_lbfgs_regression(): X = Xboston y = yboston for activation in ACTIVATION_TYPES: - mlp = MLPRegressor(solver='lbgfs', hidden_layer_sizes=50, + mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=150, shuffle=True, random_state=1, activation=activation) mlp.fit(X, y) @@ -287,7 +287,7 @@ def test_multilabel_classification(): # test fit method X, y = make_multilabel_classification(n_samples=50, random_state=0, return_indicator=True) - mlp = MLPClassifier(solver='lbgfs', hidden_layer_sizes=50, alpha=1e-5, + mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, alpha=1e-5, max_iter=150, random_state=0, activation='logistic', learning_rate_init=0.2) mlp.fit(X, y) @@ -305,7 +305,7 @@ def test_multilabel_classification(): def test_multioutput_regression(): # Test that multi-output regression works as expected X, y = make_regression(n_samples=200, n_targets=5) - mlp = MLPRegressor(solver='lbgfs', hidden_layer_sizes=50, max_iter=200, + mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=200, random_state=1) mlp.fit(X, y) assert_greater(mlp.score(X, y), 0.9) @@ -388,8 +388,8 @@ def test_partial_fit_errors(): assert_raises(ValueError, MLPClassifier(solver='sgd').partial_fit, X, y, classes=[2]) - # lbgfs doesn't support partial_fit - assert_false(hasattr(MLPClassifier(solver='lbgfs'), 'partial_fit')) + # lbfgs doesn't support partial_fit + assert_false(hasattr(MLPClassifier(solver='lbfgs'), 'partial_fit')) def test_params_errors(): @@ -471,7 +471,7 @@ def test_predict_proba_multilabel(): return_indicator=True) n_samples, n_classes = Y.shape - clf = MLPClassifier(solver='lbgfs', hidden_layer_sizes=30, + clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes=30, random_state=0) clf.fit(X, Y) y_proba = clf.predict_proba(X) @@ -493,7 +493,7 @@ def test_sparse_matrices(): X = X_digits_binary[:50] y = y_digits_binary[:50] X_sparse = csr_matrix(X) - mlp = MLPClassifier(solver='lbgfs', hidden_layer_sizes=15, + mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=15, random_state=1) mlp.fit(X, y) pred1 = mlp.predict(X)