From ce065bad325aa4cf322f618af83e0d4071087ea7 Mon Sep 17 00:00:00 2001
From: jeremie du boisberranger <jeremiedbb@yahoo.fr>
Date: Sun, 17 Jan 2021 17:31:24 +0100
Subject: [PATCH 1/9] several fixes to dict update in dict learning

---
 sklearn/decomposition/_dict_learning.py | 133 ++++++++++--------------
 1 file changed, 57 insertions(+), 76 deletions(-)

diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index 046738aa9700d..e323d9a5f2c84 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -354,28 +354,30 @@ def sparse_encode(X, dictionary, *, gram=None, cov=None,
     return code
 
 
-def _update_dict(dictionary, Y, code, verbose=False, return_r2=False,
-                 random_state=None, positive=False):
+def _update_dict(dictionary, Y, code, A=None, B=None, verbose=False,
+                 return_r2=False, random_state=None, positive=False):
     """Update the dense dictionary factor in place.
 
     Parameters
     ----------
-    dictionary : ndarray of shape (n_features, n_components)
+    dictionary : ndarray of shape (n_components, n_features)
         Value of the dictionary at the previous iteration.
 
-    Y : ndarray of shape (n_features, n_samples)
+    Y : ndarray of shape (n_samples, n_features)
         Data matrix.
 
-    code : ndarray of shape (n_components, n_samples)
+    code : ndarray of shape (n_samples, n_components)
         Sparse coding of the data against which to optimize the dictionary.
 
+    A : ndarray of shape (n_components, n_components), default=None
+        With B, sufficient stats of the online model.
+
+    B : ndarray of shape (n_features, n_components), default=None
+        With A, sufficient stats of the online model.
+
     verbose: bool, default=False
         Degree of output the procedure will print.
 
-    return_r2 : bool, default=False
-        Whether to compute and return the residual sum of squares corresponding
-        to the computed solution.
-
     random_state : int, RandomState instance or None, default=None
         Used for randomly initializing the dictionary. Pass an int for
         reproducible results across multiple function calls.
@@ -385,54 +387,37 @@ def _update_dict(dictionary, Y, code, verbose=False, return_r2=False,
         Whether to enforce positivity when finding the dictionary.
 
         .. versionadded:: 0.20
-
-    Returns
-    -------
-    dictionary : ndarray of shape (n_features, n_components)
-        Updated dictionary.
     """
-    n_components = len(code)
-    n_features = Y.shape[0]
+    n_samples, n_components = code.shape
     random_state = check_random_state(random_state)
-    # Get BLAS functions
-    gemm, = linalg.get_blas_funcs(('gemm',), (dictionary, code, Y))
-    ger, = linalg.get_blas_funcs(('ger',), (dictionary, code))
-    nrm2, = linalg.get_blas_funcs(('nrm2',), (dictionary,))
-    # Residuals, computed with BLAS for speed and efficiency
-    # R <- -1.0 * U * V^T + 1.0 * Y
-    # Outputs R as Fortran array for efficiency
-    R = gemm(-1.0, dictionary, code, 1.0, Y)
+
+    if A is None:
+        A = np.dot(code.T, code)
+    if B is None:
+        B = np.dot(Y.T, code)
+
     for k in range(n_components):
-        # R <- 1.0 * U_k * V_k^T + R
-        R = ger(1.0, dictionary[:, k], code[k, :], a=R, overwrite_a=True)
-        dictionary[:, k] = np.dot(R, code[k, :])
-        if positive:
-            np.clip(dictionary[:, k], 0, None, out=dictionary[:, k])
-        # Scale k'th atom
-        # (U_k * U_k) ** 0.5
-        atom_norm = nrm2(dictionary[:, k])
-        if atom_norm < 1e-10:
+        if A[k, k] > 1e-6:
+            dictionary[k] += (B[:, k] - A[k].dot(dictionary)) / A[k, k]
+        else:
+            # kth atom is almost never used -> sample a new one from the data
             if verbose == 1:
                 sys.stdout.write("+")
                 sys.stdout.flush()
             elif verbose:
                 print("Adding new random atom")
-            dictionary[:, k] = random_state.randn(n_features)
-            if positive:
-                np.clip(dictionary[:, k], 0, None, out=dictionary[:, k])
-            # Setting corresponding coefs to 0
-            code[k, :] = 0.0
-            # (U_k * U_k) ** 0.5
-            atom_norm = nrm2(dictionary[:, k])
-            dictionary[:, k] /= atom_norm
-        else:
-            dictionary[:, k] /= atom_norm
-            # R <- -1.0 * U_k * V_k^T + R
-            R = ger(-1.0, dictionary[:, k], code[k, :], a=R, overwrite_a=True)
-    if return_r2:
-        R = nrm2(R) ** 2.0
-        return dictionary, R
-    return dictionary
+
+            newd = Y[random_state.choice(n_samples)]
+            # add small noise to avoid making the sparse coding ill conditioned
+            noise = random_state.normal(0, 0.01 * newd.std(), size=len(newd))
+            dictionary[k] = newd + noise
+            code[:, k] = 0
+
+        if positive:
+            np.clip(dictionary[k], 0, None, out=dictionary[k])
+
+        # Projection on the constraint ||V_k|| == 1
+        dictionary[k] /= linalg.norm(dictionary[k])
 
 
 @_deprecate_positional_args
@@ -574,11 +559,10 @@ def dict_learning(X, n_components, *, alpha, max_iter=100, tol=1e-8,
         dictionary = np.r_[dictionary,
                            np.zeros((n_components - r, dictionary.shape[1]))]
 
-    # Fortran-order dict, as we are going to access its row vectors
+    # Fortran-order dict better suited for the sparse coding which is the
+    # bottleneck of this algorithm.
     dictionary = np.array(dictionary, order='F')
 
-    residuals = 0
-
     errors = []
     current_cost = np.nan
 
@@ -602,15 +586,14 @@ def dict_learning(X, n_components, *, alpha, max_iter=100, tol=1e-8,
         code = sparse_encode(X, dictionary, algorithm=method, alpha=alpha,
                              init=code, n_jobs=n_jobs, positive=positive_code,
                              max_iter=method_max_iter, verbose=verbose)
-        # Update dictionary
-        dictionary, residuals = _update_dict(dictionary.T, X.T, code.T,
-                                             verbose=verbose, return_r2=True,
-                                             random_state=random_state,
-                                             positive=positive_dict)
-        dictionary = dictionary.T
+
+        # Update dictionary in place
+        _update_dict(dictionary, X, code, verbose=verbose,
+                     random_state=random_state, positive=positive_dict)
 
         # Cost function
-        current_cost = 0.5 * residuals + alpha * np.sum(np.abs(code))
+        current_cost = (0.5 * np.sum((X - code @ dictionary)**2)
+                        + alpha * np.sum(np.abs(code)))
         errors.append(current_cost)
 
         if ii > 0:
@@ -802,7 +785,7 @@ def dict_learning_online(X, n_components=2, *, alpha=1, n_iter=100,
     else:
         X_train = X
 
-    dictionary = check_array(dictionary.T, order='F', dtype=np.float64,
+    dictionary = check_array(dictionary, order='F', dtype=np.float64,
                              copy=False)
     dictionary = np.require(dictionary, requirements='W')
 
@@ -834,11 +817,11 @@ def dict_learning_online(X, n_components=2, *, alpha=1, n_iter=100,
                 print("Iteration % 3i (elapsed time: % 3is, % 4.1fmn)"
                       % (ii, dt, dt / 60))
 
-        this_code = sparse_encode(this_X, dictionary.T, algorithm=method,
+        this_code = sparse_encode(this_X, dictionary, algorithm=method,
                                   alpha=alpha, n_jobs=n_jobs,
                                   check_input=False,
                                   positive=positive_code,
-                                  max_iter=method_max_iter, verbose=verbose).T
+                                  max_iter=method_max_iter, verbose=verbose)
 
         # Update the auxiliary variables
         if ii < batch_size - 1:
@@ -848,15 +831,13 @@ def dict_learning_online(X, n_components=2, *, alpha=1, n_iter=100,
         beta = (theta + 1 - batch_size) / (theta + 1)
 
         A *= beta
-        A += np.dot(this_code, this_code.T)
+        A += np.dot(this_code.T, this_code)
         B *= beta
-        B += np.dot(this_X.T, this_code.T)
+        B += np.dot(this_X.T, this_code)
 
-        # Update dictionary
-        dictionary = _update_dict(dictionary, B, A, verbose=verbose,
-                                  random_state=random_state,
-                                  positive=positive_dict)
-        # XXX: Can the residuals be of any use?
+        # Update dictionary in place
+        _update_dict(dictionary, this_X, this_code, A, B, verbose=verbose,
+                     random_state=random_state, positive=positive_dict)
 
         # Maybe we need a stopping criteria based on the amount of
         # modification in the dictionary
@@ -865,15 +846,15 @@ def dict_learning_online(X, n_components=2, *, alpha=1, n_iter=100,
 
     if return_inner_stats:
         if return_n_iter:
-            return dictionary.T, (A, B), ii - iter_offset + 1
+            return dictionary, (A, B), ii - iter_offset + 1
         else:
-            return dictionary.T, (A, B)
+            return dictionary, (A, B)
     if return_code:
         if verbose > 1:
             print('Learning code...', end=' ')
         elif verbose == 1:
             print('|', end=' ')
-        code = sparse_encode(X, dictionary.T, algorithm=method, alpha=alpha,
+        code = sparse_encode(X, dictionary, algorithm=method, alpha=alpha,
                              n_jobs=n_jobs, check_input=False,
                              positive=positive_code, max_iter=method_max_iter,
                              verbose=verbose)
@@ -881,14 +862,14 @@ def dict_learning_online(X, n_components=2, *, alpha=1, n_iter=100,
             dt = (time.time() - t0)
             print('done (total time: % 3is, % 4.1fmn)' % (dt, dt / 60))
         if return_n_iter:
-            return code, dictionary.T, ii - iter_offset + 1
+            return code, dictionary, ii - iter_offset + 1
         else:
-            return code, dictionary.T
+            return code, dictionary
 
     if return_n_iter:
-        return dictionary.T, ii - iter_offset + 1
+        return dictionary, ii - iter_offset + 1
     else:
-        return dictionary.T
+        return dictionary
 
 
 class _BaseSparseCoding(TransformerMixin):

From d78859bdb522340c4941ca8f2d49f0ddba7affeb Mon Sep 17 00:00:00 2001
From: jeremie du boisberranger <jeremiedbb@yahoo.fr>
Date: Mon, 18 Jan 2021 18:09:02 +0100
Subject: [PATCH 2/9] fix docstrings

---
 sklearn/decomposition/_dict_learning.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index e323d9a5f2c84..395f3295863c7 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -1250,7 +1250,7 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
     We can check the level of sparsity of `X_transformed`:
 
     >>> np.mean(X_transformed == 0)
-    0.88...
+    0.87...
 
     We can compare the average squared euclidean norm of the reconstruction
     error of the sparse coded signal relative to the squared euclidean norm of
@@ -1258,7 +1258,7 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
 
     >>> X_hat = X_transformed @ dict_learner.components_
     >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
-    0.07...
+    0.08...
 
     Notes
     -----
@@ -1490,7 +1490,7 @@ class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
     We can check the level of sparsity of `X_transformed`:
 
     >>> np.mean(X_transformed == 0)
-    0.87...
+    0.86...
 
     We can compare the average squared euclidean norm of the reconstruction
     error of the sparse coded signal relative to the squared euclidean norm of
@@ -1498,7 +1498,7 @@ class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
 
     >>> X_hat = X_transformed @ dict_learner.components_
     >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
-    0.10...
+    0.07...
 
     Notes
     -----

From 66553aab86f3695c3dafb362ce1cc8a57fcc5064 Mon Sep 17 00:00:00 2001
From: jeremie du boisberranger <jeremiedbb@yahoo.fr>
Date: Mon, 18 Jan 2021 18:31:47 +0100
Subject: [PATCH 3/9] avoid noise with 0 std

---
 sklearn/decomposition/_dict_learning.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index 395f3295863c7..7225b122103b5 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -408,8 +408,11 @@ def _update_dict(dictionary, Y, code, A=None, B=None, verbose=False,
                 print("Adding new random atom")
 
             newd = Y[random_state.choice(n_samples)]
+
             # add small noise to avoid making the sparse coding ill conditioned
-            noise = random_state.normal(0, 0.01 * newd.std(), size=len(newd))
+            noise_level = 0.01 * (newd.std() or 1)  # avoid 0 std
+            noise = random_state.normal(0, noise_level, size=len(newd))
+
             dictionary[k] = newd + noise
             code[:, k] = 0
 

From 2885c21158abadb0dd9fb102a511086898d8e524 Mon Sep 17 00:00:00 2001
From: jeremie du boisberranger <jeremiedbb@yahoo.fr>
Date: Tue, 19 Jan 2021 14:36:14 +0100
Subject: [PATCH 4/9] add test for dict update

---
 .../decomposition/tests/test_dict_learning.py | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index c9590f3136678..31139dec9093a 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -10,6 +10,7 @@
 
 from sklearn.utils import check_array
 
+from sklearn.utils._testing import assert_allclose
 from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import ignore_warnings
@@ -25,6 +26,8 @@
 from sklearn.utils.estimator_checks import check_transformer_general
 from sklearn.utils.estimator_checks import check_transformers_unfitted
 
+from sklearn.decomposition._dict_learning import _update_dict
+
 
 rng_global = np.random.RandomState(0)
 n_samples, n_features = 10, 8
@@ -573,3 +576,32 @@ def test_sparse_coder_n_features_in():
     d = np.array([[1, 2, 3], [1, 2, 3]])
     sc = SparseCoder(d)
     assert sc.n_features_in_ == d.shape[1]
+
+
+def test_update_dict():
+    # Check that the dict update is correct in batch and online mode
+    code = np.array([[0.5, -0.5],
+                     [0.1, 0.9]])
+    dictionary = np.array([[1., 0.],
+                           [0.6, 0.8]])
+
+    # Create X s.t. the dictionary is already optimal
+    X = np.dot(code, dictionary)
+
+    # Since ||d_k|| = 1, we expect the update to not change the dictionary.
+    expected = dictionary.copy()
+
+    # batch mode
+    new_dict = dictionary.copy()
+    _update_dict(new_dict, X, code)
+
+    assert_allclose(new_dict, expected)
+
+    # online mode
+    A = np.dot(code.T, code)
+    B = np.dot(X.T, code)
+
+    new_dict = dictionary.copy()
+    _update_dict(new_dict, X, code, A, B)
+
+    assert_allclose(new_dict, expected)

From bd30598e6aba1a8c8611678181603e41555bfb89 Mon Sep 17 00:00:00 2001
From: jeremie du boisberranger <jeremiedbb@yahoo.fr>
Date: Tue, 19 Jan 2021 16:53:08 +0100
Subject: [PATCH 5/9] better test

---
 .../decomposition/tests/test_dict_learning.py | 26 ++++++++-----------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index 31139dec9093a..14e1524f00802 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -579,29 +579,25 @@ def test_sparse_coder_n_features_in():
 
 
 def test_update_dict():
-    # Check that the dict update is correct in batch and online mode
+    # Check the dict update in batch mode vs online mode
+    rng = np.random.RandomState(0)
+
     code = np.array([[0.5, -0.5],
                      [0.1, 0.9]])
     dictionary = np.array([[1., 0.],
                            [0.6, 0.8]])
 
-    # Create X s.t. the dictionary is already optimal
-    X = np.dot(code, dictionary)
-
-    # Since ||d_k|| = 1, we expect the update to not change the dictionary.
-    expected = dictionary.copy()
-
-    # batch mode
-    new_dict = dictionary.copy()
-    _update_dict(new_dict, X, code)
+    X = np.dot(code, dictionary) + rng.randn(2, 2)
 
-    assert_allclose(new_dict, expected)
+    # batch update
+    newd_batch = dictionary.copy()
+    _update_dict(newd_batch, X, code)
 
-    # online mode
+    # online update
     A = np.dot(code.T, code)
     B = np.dot(X.T, code)
 
-    new_dict = dictionary.copy()
-    _update_dict(new_dict, X, code, A, B)
+    newd_online = dictionary.copy()
+    _update_dict(newd_online, X, code, A, B)
 
-    assert_allclose(new_dict, expected)
+    assert_allclose(newd_batch, newd_online)

From 7cb5e6894efce05411238ddf4d39c6cb92e5448a Mon Sep 17 00:00:00 2001
From: jeremie du boisberranger <jeremiedbb@yahoo.fr>
Date: Tue, 19 Jan 2021 18:01:19 +0100
Subject: [PATCH 6/9] fix constraint on the atom norms for dict learning

---
 doc/modules/decomposition.rst           | 4 ++--
 sklearn/decomposition/_dict_learning.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
index 7e8e79d9d8bdd..12632d9e8845c 100644
--- a/doc/modules/decomposition.rst
+++ b/doc/modules/decomposition.rst
@@ -247,7 +247,7 @@ problem solved is a PCA problem (dictionary learning) with an
 .. math::
    (U^*, V^*) = \underset{U, V}{\operatorname{arg\,min\,}} & \frac{1}{2}
                 ||X-UV||_2^2+\alpha||V||_1 \\
-                \text{subject to } & ||U_k||_2 = 1 \text{ for all }
+                \text{subject to } & ||U_k||_2 <= 1 \text{ for all }
                 0 \leq k < n_{components}
 
 
@@ -433,7 +433,7 @@ dictionary fixed, and then updating the dictionary to best fit the sparse code.
 .. math::
    (U^*, V^*) = \underset{U, V}{\operatorname{arg\,min\,}} & \frac{1}{2}
                 ||X-UV||_2^2+\alpha||U||_1 \\
-                \text{subject to } & ||V_k||_2 = 1 \text{ for all }
+                \text{subject to } & ||V_k||_2 <= 1 \text{ for all }
                 0 \leq k < n_{\mathrm{atoms}}
 
 
diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index 7225b122103b5..eb4d5b7635da1 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -419,8 +419,8 @@ def _update_dict(dictionary, Y, code, A=None, B=None, verbose=False,
         if positive:
             np.clip(dictionary[k], 0, None, out=dictionary[k])
 
-        # Projection on the constraint ||V_k|| == 1
-        dictionary[k] /= linalg.norm(dictionary[k])
+        # Projection on the constraint ||V_k|| <= 1
+        dictionary[k] /= max(linalg.norm(dictionary[k]), 1)
 
 
 @_deprecate_positional_args
@@ -1120,7 +1120,7 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
 
         (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1
                     (U,V)
-                    with || V_k ||_2 = 1 for all  0 <= k < n_components
+                    with || V_k ||_2 <= 1 for all  0 <= k < n_components
 
     Read more in the :ref:`User Guide <DictionaryLearning>`.
 
@@ -1352,7 +1352,7 @@ class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
 
        (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1
                     (U,V)
-                    with || V_k ||_2 = 1 for all  0 <= k < n_components
+                    with || V_k ||_2 <= 1 for all  0 <= k < n_components
 
     Read more in the :ref:`User Guide <DictionaryLearning>`.
 

From 917e5757e48a2aec57c7c9c856a0f3c3c479b2b6 Mon Sep 17 00:00:00 2001
From: jeremie du boisberranger <jeremiedbb@yahoo.fr>
Date: Thu, 15 Apr 2021 14:41:44 +0200
Subject: [PATCH 7/9] cln

---
 sklearn/decomposition/_dict_learning.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index 4a97b24af5a7f..6eca2f8d6e661 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -420,8 +420,8 @@ def _update_dict(dictionary, Y, code, A=None, B=None, verbose=False,
         if positive:
             np.clip(dictionary[k], 0, None, out=dictionary[k])
 
-        # Projection on the constraint set ||V_k|| == 1
-        dictionary[k] /= linalg.norm(dictionary[k])
+        # Projection on the constraint set ||V_k|| <= 1
+        dictionary[k] /= max(linalg.norm(dictionary[k]), 1)
 
     if verbose and n_unused > 0:
         print(f"{n_unused} unused atoms resampled.")

From feea61761299ec06b1191e64cf9d80a11df1a6fc Mon Sep 17 00:00:00 2001
From: jeremie du boisberranger <jeremiedbb@yahoo.fr>
Date: Wed, 27 Oct 2021 15:07:11 +0200
Subject: [PATCH 8/9] what's new

---
 doc/whats_new/v1.1.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index 261b6f8eac6bf..fa4690d7b0f22 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -74,6 +74,12 @@ Changelog
   and :class:`~sklearn.decomposition.TruncatedSVD`. :pr:`21334` by
   `Thomas Fan`_.
 
+- |Fix| Fixed the constraint on the objective function of
+  :class:`decomposition.DictionaryLearning`,
+  :class:`decomposition.MiniBatchDictionaryLearning`, :class:`decomposition.SparsePCA`
+  and :class:`decomposition.MiniBatchSparsePCA` to be convex and match the referenced
+  article. :pr:`19210` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
 :mod:`sklearn.impute`
 .....................
 

From 7ac2260c87fe976d15a61a307e5ff17bffe61998 Mon Sep 17 00:00:00 2001
From: jeremie du boisberranger <jeremiedbb@yahoo.fr>
Date: Wed, 27 Oct 2021 16:31:32 +0200
Subject: [PATCH 9/9] change version

---
 doc/whats_new/v1.0.rst | 20 ++++++++++++++++++++
 doc/whats_new/v1.1.rst |  6 ------
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 17c2b845d40d9..1cc84b74198ce 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -2,6 +2,26 @@
 
 .. currentmodule:: sklearn
 
+.. _changes_1_0_2:
+
+Version 1.0.2
+=============
+
+**In Development**
+
+Changelog
+---------
+
+:mod:`sklearn.decomposition`
+............................
+
+- |Fix| Fixed the constraint on the objective function of
+  :class:`decomposition.DictionaryLearning`,
+  :class:`decomposition.MiniBatchDictionaryLearning`, :class:`decomposition.SparsePCA`
+  and :class:`decomposition.MiniBatchSparsePCA` to be convex and match the referenced
+  article. :pr:`19210` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
+
 .. _changes_1_0_1:
 
 Version 1.0.1
diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index fa4690d7b0f22..261b6f8eac6bf 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -74,12 +74,6 @@ Changelog
   and :class:`~sklearn.decomposition.TruncatedSVD`. :pr:`21334` by
   `Thomas Fan`_.
 
-- |Fix| Fixed the constraint on the objective function of
-  :class:`decomposition.DictionaryLearning`,
-  :class:`decomposition.MiniBatchDictionaryLearning`, :class:`decomposition.SparsePCA`
-  and :class:`decomposition.MiniBatchSparsePCA` to be convex and match the referenced
-  article. :pr:`19210` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
-
 :mod:`sklearn.impute`
 .....................