rth
diff --git a/‎doc/whats_new/v1.0.rst
Lines changed: 7 additions & 1 deletion b/‎doc/whats_new/v1.0.rst
Lines changed: 7 additions & 1 deletion
diff --git a/‎sklearn/neural_network/_multilayer_perceptron.py
Lines changed: 2 additions & 3 deletions b/‎sklearn/neural_network/_multilayer_perceptron.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎sklearn/neural_network/_stochastic_optimizers.py
Lines changed: 10 additions & 11 deletions b/‎sklearn/neural_network/_stochastic_optimizers.py
Lines changed: 10 additions & 11 deletions
diff --git a/‎sklearn/neural_network/tests/test_mlp.py
Lines changed: 28 additions & 0 deletions b/‎sklearn/neural_network/tests/test_mlp.py
Lines changed: 28 additions & 0 deletions
diff --git a/‎sklearn/neural_network/tests/test_stochastic_optimizers.py
Lines changed: 9 additions & 11 deletions b/‎sklearn/neural_network/tests/test_stochastic_optimizers.py
Lines changed: 9 additions & 11 deletions
@@ -555,7 +555,6 @@ Changelog
   Use ``var_`` instead.
   :pr:`18842` by :user:`Hong Shao Yang <hongshaoyang>`.
 
-
 :mod:`sklearn.neighbors`
 ........................
 
@@ -574,6 +573,13 @@ Changelog
   `__init__` and validates `weights` in `fit` instead. :pr:`20072` by
   :user:`Juan Carlos Alfaro Jiménez <alfaro96>`.
 
+:mod:`sklearn.neural_network`
+.............................
+
+- |Fix| :class:`neural_network.MLPClassifier` and
+  :class:`neural_network.MLPRegressor` now correct supports continued training
+  when loading from a pickled file. :pr:`19631` by `Thomas Fan`_.
+
 :mod:`sklearn.pipeline`
 .......................
 
 
@@ -557,9 +557,8 @@ def _fit_stochastic(
         incremental,
     ):
 
+        params = self.coefs_ + self.intercepts_
         if not incremental or not hasattr(self, "_optimizer"):
-            params = self.coefs_ + self.intercepts_
-
             if self.solver == "sgd":
                 self._optimizer = SGDOptimizer(
                     params,
@@ -642,7 +641,7 @@ def _fit_stochastic(
 
                     # update weights
                     grads = coef_grads + intercept_grads
-                    self._optimizer.update_params(grads)
+                    self._optimizer.update_params(params, grads)
 
                 self.n_iter_ += 1
                 self.loss_ = accumulated_loss / X.shape[0]
 
@@ -12,10 +12,6 @@ class BaseOptimizer:
 
     Parameters
     ----------
-    params : list, length = len(coefs_) + len(intercepts_)
-        The concatenated list containing coefs_ and intercepts_ in MLP model.
-        Used for initializing velocities and updating params
-
     learning_rate_init : float, default=0.1
         The initial learning rate used. It controls the step-size in updating
         the weights
@@ -26,22 +22,25 @@ class BaseOptimizer:
         the current learning rate
     """
 
-    def __init__(self, params, learning_rate_init=0.1):
-        self.params = [param for param in params]
+    def __init__(self, learning_rate_init=0.1):
         self.learning_rate_init = learning_rate_init
         self.learning_rate = float(learning_rate_init)
 
-    def update_params(self, grads):
+    def update_params(self, params, grads):
         """Update parameters with given gradients
 
         Parameters
         ----------
-        grads : list, length = len(params)
+        params : list of length = len(coefs_) + len(intercepts_)
+            The concatenated list containing coefs_ and intercepts_ in MLP
+            model. Used for initializing velocities and updating params
+
+        grads : list of length = len(params)
             Containing gradients with respect to coefs_ and intercepts_ in MLP
             model. So length should be aligned with params
         """
         updates = self._get_updates(grads)
-        for param, update in zip(self.params, updates):
+        for param, update in zip((p for p in params), updates):
             param += update
 
     def iteration_ends(self, time_step):
@@ -128,7 +127,7 @@ def __init__(
         nesterov=True,
         power_t=0.5,
     ):
-        super().__init__(params, learning_rate_init)
+        super().__init__(learning_rate_init)
 
         self.lr_schedule = lr_schedule
         self.momentum = momentum
@@ -246,7 +245,7 @@ class AdamOptimizer(BaseOptimizer):
     def __init__(
         self, params, learning_rate_init=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8
     ):
-        super().__init__(params, learning_rate_init)
+        super().__init__(learning_rate_init)
 
         self.beta_1 = beta_1
         self.beta_2 = beta_2
 
@@ -11,6 +11,7 @@
 import re
 
 import numpy as np
+import joblib
 
 from numpy.testing import (
     assert_almost_equal,
@@ -869,3 +870,30 @@ def test_mlp_param_dtypes(dtype, Estimator):
 
     if Estimator == MLPRegressor:
         assert pred.dtype == dtype
+
+
+def test_mlp_loading_from_joblib_partial_fit(tmp_path):
+    """Loading from MLP and partial fitting updates weights. Non-regression
+    test for #19626."""
+    pre_trained_estimator = MLPRegressor(
+        hidden_layer_sizes=(42,), random_state=42, learning_rate_init=0.01, max_iter=200
+    )
+    features, target = [[2]], [4]
+
+    # Fit on x=2, y=4
+    pre_trained_estimator.fit(features, target)
+
+    # dump and load model
+    pickled_file = tmp_path / "mlp.pkl"
+    joblib.dump(pre_trained_estimator, pickled_file)
+    load_estimator = joblib.load(pickled_file)
+
+    # Train for a more epochs on point x=2, y=1
+    fine_tune_features, fine_tune_target = [[2]], [1]
+
+    for _ in range(200):
+        load_estimator.partial_fit(fine_tune_features, fine_tune_target)
+
+    # finetuned model learned the new target
+    predicted_value = load_estimator.predict(fine_tune_features)
+    assert_allclose(predicted_value, fine_tune_target, rtol=1e-4)
@@ -12,10 +12,8 @@
 
 
 def test_base_optimizer():
-    params = [np.zeros(shape) for shape in shapes]
-
     for lr in [10 ** i for i in range(-3, 4)]:
-        optimizer = BaseOptimizer(params, lr)
+        optimizer = BaseOptimizer(lr)
         assert optimizer.trigger_stopping("", False)
 
 
@@ -27,9 +25,9 @@ def test_sgd_optimizer_no_momentum():
         optimizer = SGDOptimizer(params, lr, momentum=0, nesterov=False)
         grads = [rng.random_sample(shape) for shape in shapes]
         expected = [param - lr * grad for param, grad in zip(params, grads)]
-        optimizer.update_params(grads)
+        optimizer.update_params(params, grads)
 
-        for exp, param in zip(expected, optimizer.params):
+        for exp, param in zip(expected, params):
             assert_array_equal(exp, param)
 
 
@@ -47,9 +45,9 @@ def test_sgd_optimizer_momentum():
             momentum * velocity - lr * grad for velocity, grad in zip(velocities, grads)
         ]
         expected = [param + update for param, update in zip(params, updates)]
-        optimizer.update_params(grads)
+        optimizer.update_params(params, grads)
 
-        for exp, param in zip(expected, optimizer.params):
+        for exp, param in zip(expected, params):
             assert_array_equal(exp, param)
 
 
@@ -79,9 +77,9 @@ def test_sgd_optimizer_nesterovs_momentum():
             momentum * update - lr * grad for update, grad in zip(updates, grads)
         ]
         expected = [param + update for param, update in zip(params, updates)]
-        optimizer.update_params(grads)
+        optimizer.update_params(params, grads)
 
-        for exp, param in zip(expected, optimizer.params):
+        for exp, param in zip(expected, params):
             assert_array_equal(exp, param)
 
 
@@ -110,6 +108,6 @@ def test_adam_optimizer():
             ]
             expected = [param + update for param, update in zip(params, updates)]
 
-            optimizer.update_params(grads)
-            for exp, param in zip(expected, optimizer.params):
+            optimizer.update_params(params, grads)
+            for exp, param in zip(expected, params):
                 assert_array_equal(exp, param)