From 41d0a4018e9e6783d725fc9c128f407425feba5c Mon Sep 17 00:00:00 2001 From: mchikyt3 Date: Mon, 22 May 2023 14:38:48 +0100 Subject: [PATCH 01/29] BUG Fix initializing `precisions_cholesky_` from `precisions_init` --- sklearn/mixture/_gaussian_mixture.py | 18 +++++-- .../mixture/tests/test_gaussian_mixture.py | 50 +++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index e0b630f37c163..a3699719b4a1e 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -726,15 +726,27 @@ def _initialize(self, X, resp): covariances, self.covariance_type ) elif self.covariance_type == "full": + n_features = self.precisions_init.shape[-1] + exchange_matrix = np.fliplr(np.eye(n_features)) self.precisions_cholesky_ = np.array( [ - linalg.cholesky(prec_init, lower=True) + exchange_matrix + @ linalg.cholesky( + exchange_matrix @ prec_init @ exchange_matrix, lower=True + ) + @ exchange_matrix for prec_init in self.precisions_init ] ) elif self.covariance_type == "tied": - self.precisions_cholesky_ = linalg.cholesky( - self.precisions_init, lower=True + n_features = self.precisions_init.shape[-1] + exchange_matrix = np.fliplr(np.eye(n_features)) + self.precisions_cholesky_ = ( + exchange_matrix + @ linalg.cholesky( + exchange_matrix @ self.precisions_init @ exchange_matrix, lower=True + ) + @ exchange_matrix ) else: self.precisions_cholesky_ = np.sqrt(self.precisions_init) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index f2d634b3fffe5..cb2e26e99bb9b 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1325,6 +1325,56 @@ def test_gaussian_mixture_precisions_init_diag(): ) +def test_gaussian_mixture_precisions_init(): + def _generate_data(n_samples, n_features, n_components): + """Randomly generate samples and responsibilities""" + rs = np.random.RandomState(12345) + X = rs.random_sample((n_samples, n_features)) + resp = rs.random_sample((n_samples, n_components)) + resp /= resp.sum(axis=1)[:, np.newaxis] + return X, resp + + def _calculate_precisions(X, resp, covariance_type): + """Calculate precision matrix and its Cholesky decomposition""" + reg_covar = 1e-6 + weights, means, covariances = _estimate_gaussian_parameters( + X, resp, reg_covar, covariance_type + ) + precisions_cholesky = _compute_precision_cholesky(covariances, covariance_type) + + _, n_components = resp.shape + # Instantiate a `GaussianMixture` model in order to use its + # `_set_parameters` method to compute `precisions_` from + # `precisions_cholesky_` + gmm = GaussianMixture( + n_components=n_components, covariance_type=covariance_type + ) + params = (weights, means, covariances, precisions_cholesky) + # pylint: disable-next=protected-access + gmm._set_parameters(params) + return gmm.precisions_, gmm.precisions_cholesky_ + + X, resp = _generate_data(n_samples=100, n_features=3, n_components=4) + + for covariance_type in ("full", "tied", "diag", "spherical"): + # Arrange + precisions_init, precisions_cholesky = _calculate_precisions( + X, resp, covariance_type + ) + desired_precisions_cholesky = precisions_cholesky + + # Act + gmm = GaussianMixture( + covariance_type=covariance_type, precisions_init=precisions_init + ) + # pylint: disable-next=protected-access + gmm._initialize(X, resp) + actual_precisions_cholesky = gmm.precisions_cholesky_ + + # Assert + assert_allclose(actual_precisions_cholesky, desired_precisions_cholesky) + + def test_gaussian_mixture_single_component_stable(): """ Non-regression test for #23032 ensuring 1-component GM works on only a From 61e5249380df1a09ee631e08d07e74f13dfcfabe Mon Sep 17 00:00:00 2001 From: mchikyt3 Date: Tue, 23 May 2023 13:26:22 +0100 Subject: [PATCH 02/29] Added change log. --- doc/whats_new/v1.3.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index a4cc3fa690474..b3d759852816f 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -51,6 +51,12 @@ random sampling procedures. used each time the kernel is called. :pr:`26337` by :user:`Yao Xiao `. +- |Fix| Initializing :class:`mixture.GaussianMixture` from user-provided + `precisions_init` for `covariance_type` of `full` or `tied` was not correct, and has + been fixed. Changes have been made in the `_initialize` method of + :class:`mixture.GaussianMixture`. + :pr:`26416` by :user:`Yang Tao `. + Changes impacting all modules ----------------------------- From 806c7e6c17c5fa7a222b4208c46d2b7829e19a56 Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Thu, 1 Jun 2023 19:43:33 +0100 Subject: [PATCH 03/29] Update sklearn/mixture/tests/test_gaussian_mixture.py Co-authored-by: Julien Jerphanion --- sklearn/mixture/tests/test_gaussian_mixture.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index cb2e26e99bb9b..e6fa5614a92c9 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1358,10 +1358,9 @@ def _calculate_precisions(X, resp, covariance_type): for covariance_type in ("full", "tied", "diag", "spherical"): # Arrange - precisions_init, precisions_cholesky = _calculate_precisions( + precisions_init, desired_precisions_cholesky = _calculate_precisions( X, resp, covariance_type ) - desired_precisions_cholesky = precisions_cholesky # Act gmm = GaussianMixture( From 3f816ed177f63a2bfc33f4f463531a4344a6a6a9 Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Thu, 1 Jun 2023 19:43:50 +0100 Subject: [PATCH 04/29] Update sklearn/mixture/tests/test_gaussian_mixture.py Co-authored-by: Julien Jerphanion --- sklearn/mixture/tests/test_gaussian_mixture.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index e6fa5614a92c9..30f2aa82bca67 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1350,7 +1350,6 @@ def _calculate_precisions(X, resp, covariance_type): n_components=n_components, covariance_type=covariance_type ) params = (weights, means, covariances, precisions_cholesky) - # pylint: disable-next=protected-access gmm._set_parameters(params) return gmm.precisions_, gmm.precisions_cholesky_ From 17acaf4cdea13ee71a66dd4f3a0aa5912b0bba4c Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Thu, 1 Jun 2023 19:44:14 +0100 Subject: [PATCH 05/29] Update sklearn/mixture/tests/test_gaussian_mixture.py Co-authored-by: Julien Jerphanion --- sklearn/mixture/tests/test_gaussian_mixture.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 30f2aa82bca67..e05777c6d467a 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1344,8 +1344,9 @@ def _calculate_precisions(X, resp, covariance_type): _, n_components = resp.shape # Instantiate a `GaussianMixture` model in order to use its - # `_set_parameters` method to compute `precisions_` from - # `precisions_cholesky_` + # `_set_parameters` method to return the `precisions_` and + # `precisions_cholesky_` from matching the `covariance_type` + # provided. gmm = GaussianMixture( n_components=n_components, covariance_type=covariance_type ) From d172fc7726d11e8347ecaf5c5c7f968355a4c13b Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Thu, 1 Jun 2023 19:44:44 +0100 Subject: [PATCH 06/29] Update sklearn/mixture/tests/test_gaussian_mixture.py Co-authored-by: Julien Jerphanion --- sklearn/mixture/tests/test_gaussian_mixture.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index e05777c6d467a..ee429027c60c3 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1335,7 +1335,9 @@ def _generate_data(n_samples, n_features, n_components): return X, resp def _calculate_precisions(X, resp, covariance_type): - """Calculate precision matrix and its Cholesky decomposition""" + """Calculate precision matrix of X and its Cholesky decomposition + for the given covariance type. + """ reg_covar = 1e-6 weights, means, covariances = _estimate_gaussian_parameters( X, resp, reg_covar, covariance_type From 4258ee640c7f88c14ac73f4cb01f6ae31e6cad6b Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Thu, 1 Jun 2023 19:44:58 +0100 Subject: [PATCH 07/29] Update sklearn/mixture/tests/test_gaussian_mixture.py Co-authored-by: Julien Jerphanion --- sklearn/mixture/tests/test_gaussian_mixture.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index ee429027c60c3..ca92ed311d3e7 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1326,6 +1326,7 @@ def test_gaussian_mixture_precisions_init_diag(): def test_gaussian_mixture_precisions_init(): + """Non-regression test for #26415.""" def _generate_data(n_samples, n_features, n_components): """Randomly generate samples and responsibilities""" rs = np.random.RandomState(12345) From d1a770557702ade9c1e80f135eee3ee6f9d12401 Mon Sep 17 00:00:00 2001 From: mchikyt3 Date: Thu, 1 Jun 2023 23:21:33 +0100 Subject: [PATCH 08/29] Used `np.flipup` and `np.fliplr` to permutate precision array. Parameterized `COVARIANCE_TYPE`. --- sklearn/mixture/_gaussian_mixture.py | 22 ++++++++----------- .../mixture/tests/test_gaussian_mixture.py | 8 +------ 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index a3699719b4a1e..e67af5e021471 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -726,27 +726,23 @@ def _initialize(self, X, resp): covariances, self.covariance_type ) elif self.covariance_type == "full": - n_features = self.precisions_init.shape[-1] - exchange_matrix = np.fliplr(np.eye(n_features)) self.precisions_cholesky_ = np.array( [ - exchange_matrix - @ linalg.cholesky( - exchange_matrix @ prec_init @ exchange_matrix, lower=True + np.flipud( + np.fliplr( + linalg.cholesky(np.flipud(np.fliplr(prec_init)), lower=True) + ) ) - @ exchange_matrix for prec_init in self.precisions_init ] ) elif self.covariance_type == "tied": - n_features = self.precisions_init.shape[-1] - exchange_matrix = np.fliplr(np.eye(n_features)) - self.precisions_cholesky_ = ( - exchange_matrix - @ linalg.cholesky( - exchange_matrix @ self.precisions_init @ exchange_matrix, lower=True + self.precisions_cholesky_ = np.flipud( + np.fliplr( + linalg.cholesky( + np.flipud(np.fliplr(self.precisions_init)), lower=True + ) ) - @ exchange_matrix ) else: self.precisions_cholesky_ = np.sqrt(self.precisions_init) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index ca92ed311d3e7..03dafa4d6e327 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1359,21 +1359,15 @@ def _calculate_precisions(X, resp, covariance_type): X, resp = _generate_data(n_samples=100, n_features=3, n_components=4) - for covariance_type in ("full", "tied", "diag", "spherical"): - # Arrange + for covariance_type in COVARIANCE_TYPE: precisions_init, desired_precisions_cholesky = _calculate_precisions( X, resp, covariance_type ) - - # Act gmm = GaussianMixture( covariance_type=covariance_type, precisions_init=precisions_init ) - # pylint: disable-next=protected-access gmm._initialize(X, resp) actual_precisions_cholesky = gmm.precisions_cholesky_ - - # Assert assert_allclose(actual_precisions_cholesky, desired_precisions_cholesky) From 49e5e6c2e5618f3bad54bd3ea7d4275116af4587 Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:29:29 +0100 Subject: [PATCH 09/29] Update doc/whats_new/v1.3.rst Co-authored-by: Julien Jerphanion --- doc/whats_new/v1.3.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index 30061e1e1623e..feb981e580b93 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -51,10 +51,9 @@ random sampling procedures. used each time the kernel is called. :pr:`26337` by :user:`Yao Xiao `. -- |Fix| Initializing :class:`mixture.GaussianMixture` from user-provided - `precisions_init` for `covariance_type` of `full` or `tied` was not correct, and has - been fixed. Changes have been made in the `_initialize` method of - :class:`mixture.GaussianMixture`. +- |Fix| The initialization :class:`mixture.GaussianMixture` from user-provided + `precisions_init` for `covariance_type` of `full` or `tied` was not correct, + and has been fixed. :pr:`26416` by :user:`Yang Tao `. Changes impacting all modules From d3b9fd7d69205f0932bad5a4739498a0dd07df8a Mon Sep 17 00:00:00 2001 From: mchikyt3 Date: Thu, 1 Jun 2023 23:40:13 +0100 Subject: [PATCH 10/29] Typo fixed. --- doc/whats_new/v1.3.rst | 2 +- sklearn/mixture/tests/test_gaussian_mixture.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index feb981e580b93..30c9468136519 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -51,7 +51,7 @@ random sampling procedures. used each time the kernel is called. :pr:`26337` by :user:`Yao Xiao `. -- |Fix| The initialization :class:`mixture.GaussianMixture` from user-provided +- |Fix| The initialization of :class:`mixture.GaussianMixture` from user-provided `precisions_init` for `covariance_type` of `full` or `tied` was not correct, and has been fixed. :pr:`26416` by :user:`Yang Tao `. diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 03dafa4d6e327..5cee0c7a92a82 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1327,6 +1327,7 @@ def test_gaussian_mixture_precisions_init_diag(): def test_gaussian_mixture_precisions_init(): """Non-regression test for #26415.""" + def _generate_data(n_samples, n_features, n_components): """Randomly generate samples and responsibilities""" rs = np.random.RandomState(12345) From d75c111dea5f057cc63a0e7b01bc247ba7088cc7 Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Fri, 2 Jun 2023 10:45:33 +0100 Subject: [PATCH 11/29] Update sklearn/mixture/tests/test_gaussian_mixture.py Co-authored-by: Julien Jerphanion --- sklearn/mixture/tests/test_gaussian_mixture.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 5cee0c7a92a82..e82cfd26ea837 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1325,7 +1325,8 @@ def test_gaussian_mixture_precisions_init_diag(): ) -def test_gaussian_mixture_precisions_init(): +@pytest.mark.parametrize("covariance_type", COVARIANCE_TYPE) +def test_gaussian_mixture_precisions_init(covariance_type): """Non-regression test for #26415.""" def _generate_data(n_samples, n_features, n_components): From 224fe65fc391ae27717bd15b2724cf9bbccab5ec Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Fri, 2 Jun 2023 10:45:46 +0100 Subject: [PATCH 12/29] Update sklearn/mixture/tests/test_gaussian_mixture.py Co-authored-by: Julien Jerphanion --- .../mixture/tests/test_gaussian_mixture.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index e82cfd26ea837..e26a42e0f49ab 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1361,16 +1361,15 @@ def _calculate_precisions(X, resp, covariance_type): X, resp = _generate_data(n_samples=100, n_features=3, n_components=4) - for covariance_type in COVARIANCE_TYPE: - precisions_init, desired_precisions_cholesky = _calculate_precisions( - X, resp, covariance_type - ) - gmm = GaussianMixture( - covariance_type=covariance_type, precisions_init=precisions_init - ) - gmm._initialize(X, resp) - actual_precisions_cholesky = gmm.precisions_cholesky_ - assert_allclose(actual_precisions_cholesky, desired_precisions_cholesky) + precisions_init, desired_precisions_cholesky = _calculate_precisions( + X, resp, covariance_type + ) + gmm = GaussianMixture( + covariance_type=covariance_type, precisions_init=precisions_init + ) + gmm._initialize(X, resp) + actual_precisions_cholesky = gmm.precisions_cholesky_ + assert_allclose(actual_precisions_cholesky, desired_precisions_cholesky) def test_gaussian_mixture_single_component_stable(): From eba683ed7fe403b6463a3327f766a0b112f6ceea Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Fri, 2 Jun 2023 11:01:39 +0100 Subject: [PATCH 13/29] Update doc/whats_new/v1.3.rst Co-authored-by: Julien Jerphanion --- doc/whats_new/v1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index 30c9468136519..0347636ca0aac 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -51,7 +51,7 @@ random sampling procedures. used each time the kernel is called. :pr:`26337` by :user:`Yao Xiao `. -- |Fix| The initialization of :class:`mixture.GaussianMixture` from user-provided +- |FIX| The initialization of :class:`mixture.GaussianMixture` from user-provided `precisions_init` for `covariance_type` of `full` or `tied` was not correct, and has been fixed. :pr:`26416` by :user:`Yang Tao `. From 5bd554a9447cbf133fe59d046d685f49feb411dd Mon Sep 17 00:00:00 2001 From: Tao Yang SLUK Date: Fri, 2 Jun 2023 11:03:31 +0100 Subject: [PATCH 14/29] Moved two helper functions to the file scope, and added a `seed` parameter to `_generate_data`. --- .../mixture/tests/test_gaussian_mixture.py | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index e26a42e0f49ab..ec9c5264e072a 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1325,41 +1325,41 @@ def test_gaussian_mixture_precisions_init_diag(): ) +def _generate_data(seed, n_samples, n_features, n_components): + """Randomly generate samples and responsibilities.""" + rs = np.random.RandomState(seed) + X = rs.random_sample((n_samples, n_features)) + resp = rs.random_sample((n_samples, n_components)) + resp /= resp.sum(axis=1)[:, np.newaxis] + return X, resp + + +def _calculate_precisions(X, resp, covariance_type): + """Calculate precision matrix of X and its Cholesky decomposition + for the given covariance type. + """ + reg_covar = 1e-6 + weights, means, covariances = _estimate_gaussian_parameters( + X, resp, reg_covar, covariance_type + ) + precisions_cholesky = _compute_precision_cholesky(covariances, covariance_type) + + _, n_components = resp.shape + # Instantiate a `GaussianMixture` model in order to use its + # `_set_parameters` method to return the `precisions_` and + # `precisions_cholesky_` from matching the `covariance_type` + # provided. + gmm = GaussianMixture(n_components=n_components, covariance_type=covariance_type) + params = (weights, means, covariances, precisions_cholesky) + gmm._set_parameters(params) + return gmm.precisions_, gmm.precisions_cholesky_ + + @pytest.mark.parametrize("covariance_type", COVARIANCE_TYPE) def test_gaussian_mixture_precisions_init(covariance_type): """Non-regression test for #26415.""" - def _generate_data(n_samples, n_features, n_components): - """Randomly generate samples and responsibilities""" - rs = np.random.RandomState(12345) - X = rs.random_sample((n_samples, n_features)) - resp = rs.random_sample((n_samples, n_components)) - resp /= resp.sum(axis=1)[:, np.newaxis] - return X, resp - - def _calculate_precisions(X, resp, covariance_type): - """Calculate precision matrix of X and its Cholesky decomposition - for the given covariance type. - """ - reg_covar = 1e-6 - weights, means, covariances = _estimate_gaussian_parameters( - X, resp, reg_covar, covariance_type - ) - precisions_cholesky = _compute_precision_cholesky(covariances, covariance_type) - - _, n_components = resp.shape - # Instantiate a `GaussianMixture` model in order to use its - # `_set_parameters` method to return the `precisions_` and - # `precisions_cholesky_` from matching the `covariance_type` - # provided. - gmm = GaussianMixture( - n_components=n_components, covariance_type=covariance_type - ) - params = (weights, means, covariances, precisions_cholesky) - gmm._set_parameters(params) - return gmm.precisions_, gmm.precisions_cholesky_ - - X, resp = _generate_data(n_samples=100, n_features=3, n_components=4) + X, resp = _generate_data(seed=12345, n_samples=100, n_features=3, n_components=4) precisions_init, desired_precisions_cholesky = _calculate_precisions( X, resp, covariance_type From bdf1277b73e456d7c8b6207f1f7c0b22e7240b26 Mon Sep 17 00:00:00 2001 From: Tao Yang SLUK Date: Fri, 2 Jun 2023 15:44:24 +0100 Subject: [PATCH 15/29] Refactored the permutation of matrix and computation of `precision_cholesky` into two functions. --- sklearn/mixture/_gaussian_mixture.py | 77 ++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index e67af5e021471..6f34cb71d7bf0 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -350,6 +350,60 @@ def _compute_precision_cholesky(covariances, covariance_type): return precisions_chol +def _flipudlr(array): + """Reverse the rows and columns of an array.""" + return np.flipud(np.fliplr(array)) + + +def _compute_precision_cholesky_from_precision(precisions, covariance_type): + """Compute the Cholesky decomposition of the precisions using precisions. + + As implemented in :func:`_compute_precision_cholesky`, the `precisions_cholesky_` is + an upper-triangular matrix for each Gaussian components, which can expressed as the + $UU^T$ factorization of the precision matrix for each Gaussian component, where $U$ + is an upper-triangular matrix. + + In order to use the Cholesky decomposition to get $UU^T$, the precision matrix + $\Lambda$ needs to be permutated such that its rows and columns are reversed, which + can be done by a similarity transformation with an exchange matrix $J$, where the 1 + elements reside on the anti-diagonal and all other elements are 0. In particular, + the Cholesky decomposition of the transformed precision matrix is $J\Lambda J=LL^T$, + where $L$ is a lower-triangular matrix. Because $\Lambda=UU^T$ and $J=J^{-1}=J^T$, + the `precisions_cholesky_` for each Gaussian components can be expressed as $JLJ$. + + Refer to #26415 for more information. + + Parameters + ---------- + precisions : array-like + The precision matrix of the current components. + The shape depends of the covariance_type. + + covariance_type : {'full', 'tied', 'diag', 'spherical'} + The type of precision matrices. + + Returns + ------- + precisions_cholesky : array-like + The cholesky decomposition of sample precisions of the current + components. The shape depends of the covariance_type. + """ + if covariance_type == "full": + precisions_cholesky = np.array( + [ + _flipudlr(linalg.cholesky(_flipudlr(precision), lower=True)) + for precision in precisions + ] + ) + elif covariance_type == "tied": + precisions_cholesky = _flipudlr( + linalg.cholesky(_flipudlr(precisions), lower=True) + ) + else: + precisions_cholesky = np.sqrt(precisions) + return precisions_cholesky + + ############################################################################### # Gaussian mixture probability estimators def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features): @@ -725,27 +779,10 @@ def _initialize(self, X, resp): self.precisions_cholesky_ = _compute_precision_cholesky( covariances, self.covariance_type ) - elif self.covariance_type == "full": - self.precisions_cholesky_ = np.array( - [ - np.flipud( - np.fliplr( - linalg.cholesky(np.flipud(np.fliplr(prec_init)), lower=True) - ) - ) - for prec_init in self.precisions_init - ] - ) - elif self.covariance_type == "tied": - self.precisions_cholesky_ = np.flipud( - np.fliplr( - linalg.cholesky( - np.flipud(np.fliplr(self.precisions_init)), lower=True - ) - ) - ) else: - self.precisions_cholesky_ = np.sqrt(self.precisions_init) + self.precisions_cholesky_ = _compute_precision_cholesky_from_precision( + self.precisions_init, self.covariance_type + ) def _m_step(self, X, log_resp): """M step. From c2ab97904e4e658221dd76121c4497e74efd64a9 Mon Sep 17 00:00:00 2001 From: Tao Yang SLUK Date: Fri, 2 Jun 2023 15:54:49 +0100 Subject: [PATCH 16/29] Typo fixed. --- sklearn/mixture/_gaussian_mixture.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 6f34cb71d7bf0..aac69d83872f6 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -359,9 +359,9 @@ def _compute_precision_cholesky_from_precision(precisions, covariance_type): """Compute the Cholesky decomposition of the precisions using precisions. As implemented in :func:`_compute_precision_cholesky`, the `precisions_cholesky_` is - an upper-triangular matrix for each Gaussian components, which can expressed as the - $UU^T$ factorization of the precision matrix for each Gaussian component, where $U$ - is an upper-triangular matrix. + an upper-triangular matrix for each Gaussian components, which can be expressed as + the $UU^T$ factorization of the precision matrix for each Gaussian component, where + $U$ is an upper-triangular matrix. In order to use the Cholesky decomposition to get $UU^T$, the precision matrix $\Lambda$ needs to be permutated such that its rows and columns are reversed, which From 0e2c14f684dae76d6e5553dc65d1bfd5d0415d9d Mon Sep 17 00:00:00 2001 From: Tao Yang SLUK Date: Fri, 2 Jun 2023 20:45:02 +0100 Subject: [PATCH 17/29] Typo fixed. --- sklearn/mixture/_gaussian_mixture.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index aac69d83872f6..b7342ae74861c 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -356,7 +356,7 @@ def _flipudlr(array): def _compute_precision_cholesky_from_precision(precisions, covariance_type): - """Compute the Cholesky decomposition of the precisions using precisions. + r"""Compute the Cholesky decomposition of the precisions using precisions. As implemented in :func:`_compute_precision_cholesky`, the `precisions_cholesky_` is an upper-triangular matrix for each Gaussian components, which can be expressed as @@ -371,7 +371,7 @@ def _compute_precision_cholesky_from_precision(precisions, covariance_type): where $L$ is a lower-triangular matrix. Because $\Lambda=UU^T$ and $J=J^{-1}=J^T$, the `precisions_cholesky_` for each Gaussian components can be expressed as $JLJ$. - Refer to #26415 for more information. + Refer to #26415 for details. Parameters ---------- From a090dc3a87bd62932d2dcdf63fd21fee54fa6026 Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Sat, 3 Jun 2023 10:26:31 +0100 Subject: [PATCH 18/29] Update doc/whats_new/v1.3.rst Co-authored-by: Julien Jerphanion --- doc/whats_new/v1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index 0f674abb30370..414fd90565dfe 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -51,7 +51,7 @@ random sampling procedures. used each time the kernel is called. :pr:`26337` by :user:`Yao Xiao `. -- |FIX| The initialization of :class:`mixture.GaussianMixture` from user-provided +- |Fix| The initialization of :class:`mixture.GaussianMixture` from user-provided `precisions_init` for `covariance_type` of `full` or `tied` was not correct, and has been fixed. :pr:`26416` by :user:`Yang Tao `. From 8d9f5fe3287432f7d13ae1796e76acbe895bbbef Mon Sep 17 00:00:00 2001 From: mchikyt3 Date: Tue, 11 Jul 2023 15:47:24 +0100 Subject: [PATCH 19/29] Added change log. --- doc/whats_new/v1.4.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 4ba357c52d136..61df2297ace1c 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -19,6 +19,11 @@ parameters, may produce different models from the previous version. This often occurs due to changes in the modelling logic (bug fixes or enhancements), or in random sampling procedures. +- |Fix| The initialization of :class:`mixture.GaussianMixture` from user-provided + `precisions_init` for `covariance_type` of `full` or `tied` was not correct, + and has been fixed. + :pr:`26416` by :user:`Yang Tao `. + Changes impacting all modules ----------------------------- From a8a2ab74dd4a564d0219835f1a9b7e70d06c0a4b Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Wed, 19 Jul 2023 19:56:18 +0100 Subject: [PATCH 20/29] Update sklearn/mixture/_gaussian_mixture.py Co-authored-by: Omar Salman --- sklearn/mixture/_gaussian_mixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 75fa8aff58196..738c6669ab198 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -375,7 +375,7 @@ def _compute_precision_cholesky_from_precision(precisions, covariance_type): ---------- precisions : array-like The precision matrix of the current components. - The shape depends of the covariance_type. + The shape depends on the covariance_type. covariance_type : {'full', 'tied', 'diag', 'spherical'} The type of precision matrices. From ead1671a79ffd20372e45e284378575499b424b8 Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Wed, 19 Jul 2023 19:56:31 +0100 Subject: [PATCH 21/29] Update sklearn/mixture/_gaussian_mixture.py Co-authored-by: Omar Salman --- sklearn/mixture/_gaussian_mixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 738c6669ab198..7f16250fc0cc4 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -354,7 +354,7 @@ def _flipudlr(array): def _compute_precision_cholesky_from_precision(precisions, covariance_type): - r"""Compute the Cholesky decomposition of the precisions using precisions. + r"""Compute the Cholesky decomposition of the specified precisions using the precisions themselves. As implemented in :func:`_compute_precision_cholesky`, the `precisions_cholesky_` is an upper-triangular matrix for each Gaussian components, which can be expressed as From 1e1f8d1e78132964eab72775109b5dc5020f365c Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Wed, 19 Jul 2023 19:56:38 +0100 Subject: [PATCH 22/29] Update sklearn/mixture/_gaussian_mixture.py Co-authored-by: Omar Salman --- sklearn/mixture/_gaussian_mixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 7f16250fc0cc4..91988935e1945 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -357,7 +357,7 @@ def _compute_precision_cholesky_from_precision(precisions, covariance_type): r"""Compute the Cholesky decomposition of the specified precisions using the precisions themselves. As implemented in :func:`_compute_precision_cholesky`, the `precisions_cholesky_` is - an upper-triangular matrix for each Gaussian components, which can be expressed as + an upper-triangular matrix for each Gaussian component, which can be expressed as the $UU^T$ factorization of the precision matrix for each Gaussian component, where $U$ is an upper-triangular matrix. From 8d38aab62c5b5657ce7d7d824767f98143656579 Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Wed, 19 Jul 2023 19:56:46 +0100 Subject: [PATCH 23/29] Update sklearn/mixture/_gaussian_mixture.py Co-authored-by: Omar Salman --- sklearn/mixture/_gaussian_mixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 91988935e1945..eaaea3a17aa8a 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -363,7 +363,7 @@ def _compute_precision_cholesky_from_precision(precisions, covariance_type): In order to use the Cholesky decomposition to get $UU^T$, the precision matrix $\Lambda$ needs to be permutated such that its rows and columns are reversed, which - can be done by a similarity transformation with an exchange matrix $J$, where the 1 + can be done by applying a similarity transformation with an exchange matrix $J$, where the 1 elements reside on the anti-diagonal and all other elements are 0. In particular, the Cholesky decomposition of the transformed precision matrix is $J\Lambda J=LL^T$, where $L$ is a lower-triangular matrix. Because $\Lambda=UU^T$ and $J=J^{-1}=J^T$, From e8cbb9830211568b40ef08605ac7cd851e614241 Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Wed, 19 Jul 2023 19:56:52 +0100 Subject: [PATCH 24/29] Update sklearn/mixture/_gaussian_mixture.py Co-authored-by: Omar Salman --- sklearn/mixture/_gaussian_mixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index eaaea3a17aa8a..b044124ba5a97 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -384,7 +384,7 @@ def _compute_precision_cholesky_from_precision(precisions, covariance_type): ------- precisions_cholesky : array-like The cholesky decomposition of sample precisions of the current - components. The shape depends of the covariance_type. + components. The shape depends on the covariance_type. """ if covariance_type == "full": precisions_cholesky = np.array( From f841414ae8796ff3ae10c6d8200e085b15e87959 Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Wed, 19 Jul 2023 19:56:58 +0100 Subject: [PATCH 25/29] Update sklearn/mixture/_gaussian_mixture.py Co-authored-by: Omar Salman --- sklearn/mixture/_gaussian_mixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index b044124ba5a97..aeb850cbec5e6 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -778,7 +778,7 @@ def _initialize(self, X, resp): covariances, self.covariance_type ) else: - self.precisions_cholesky_ = _compute_precision_cholesky_from_precision( + self.precisions_cholesky_ = _compute_precision_cholesky_from_precisions( self.precisions_init, self.covariance_type ) From 4841f905f1d2e627db1e38fc231f6e0d08ede30d Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Wed, 19 Jul 2023 19:57:07 +0100 Subject: [PATCH 26/29] Update sklearn/mixture/_gaussian_mixture.py Co-authored-by: Omar Salman --- sklearn/mixture/_gaussian_mixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index aeb850cbec5e6..bd3e121b3d4c9 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -353,7 +353,7 @@ def _flipudlr(array): return np.flipud(np.fliplr(array)) -def _compute_precision_cholesky_from_precision(precisions, covariance_type): +def _compute_precision_cholesky_from_precisions(precisions, covariance_type): r"""Compute the Cholesky decomposition of the specified precisions using the precisions themselves. As implemented in :func:`_compute_precision_cholesky`, the `precisions_cholesky_` is From 521d4df904e545a21ba8f1290f0472099b21d3c7 Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Wed, 19 Jul 2023 19:57:13 +0100 Subject: [PATCH 27/29] Update sklearn/mixture/_gaussian_mixture.py Co-authored-by: Omar Salman --- sklearn/mixture/_gaussian_mixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index bd3e121b3d4c9..25ee65b177cc2 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -367,7 +367,7 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type): elements reside on the anti-diagonal and all other elements are 0. In particular, the Cholesky decomposition of the transformed precision matrix is $J\Lambda J=LL^T$, where $L$ is a lower-triangular matrix. Because $\Lambda=UU^T$ and $J=J^{-1}=J^T$, - the `precisions_cholesky_` for each Gaussian components can be expressed as $JLJ$. + the `precisions_cholesky_` for each Gaussian component can be expressed as $JLJ$. Refer to #26415 for details. From e07130de85436346bf7526a273eabcb6b25afefe Mon Sep 17 00:00:00 2001 From: Tao Yang SLUK Date: Wed, 19 Jul 2023 20:10:05 +0100 Subject: [PATCH 28/29] Fixed long lines in the docstring. --- sklearn/mixture/_gaussian_mixture.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 25ee65b177cc2..9fcc791032c48 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -354,7 +354,7 @@ def _flipudlr(array): def _compute_precision_cholesky_from_precisions(precisions, covariance_type): - r"""Compute the Cholesky decomposition of the specified precisions using the precisions themselves. + r"""Compute the Cholesky decomposition of precisions using precisions themselves. As implemented in :func:`_compute_precision_cholesky`, the `precisions_cholesky_` is an upper-triangular matrix for each Gaussian component, which can be expressed as @@ -363,11 +363,12 @@ def _compute_precision_cholesky_from_precisions(precisions, covariance_type): In order to use the Cholesky decomposition to get $UU^T$, the precision matrix $\Lambda$ needs to be permutated such that its rows and columns are reversed, which - can be done by applying a similarity transformation with an exchange matrix $J$, where the 1 - elements reside on the anti-diagonal and all other elements are 0. In particular, - the Cholesky decomposition of the transformed precision matrix is $J\Lambda J=LL^T$, - where $L$ is a lower-triangular matrix. Because $\Lambda=UU^T$ and $J=J^{-1}=J^T$, - the `precisions_cholesky_` for each Gaussian component can be expressed as $JLJ$. + can be done by applying a similarity transformation with an exchange matrix $J$, + where the 1 elements reside on the anti-diagonal and all other elements are 0. In + particular, the Cholesky decomposition of the transformed precision matrix is + $J\Lambda J=LL^T$, where $L$ is a lower-triangular matrix. Because $\Lambda=UU^T$ + and $J=J^{-1}=J^T$, the `precisions_cholesky_` for each Gaussian component can be + expressed as $JLJ$. Refer to #26415 for details. From 36615aef6bffbf62b4b1649e18c591563725ce87 Mon Sep 17 00:00:00 2001 From: Yang Tao <56955779+mchikyt3@users.noreply.github.com> Date: Thu, 20 Jul 2023 10:26:51 +0100 Subject: [PATCH 29/29] Update sklearn/mixture/tests/test_gaussian_mixture.py Co-authored-by: Omar Salman --- sklearn/mixture/tests/test_gaussian_mixture.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index 7b162a4f3844e..2f39033faed6b 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -1357,10 +1357,15 @@ def _calculate_precisions(X, resp, covariance_type): @pytest.mark.parametrize("covariance_type", COVARIANCE_TYPE) -def test_gaussian_mixture_precisions_init(covariance_type): +def test_gaussian_mixture_precisions_init(covariance_type, global_random_seed): """Non-regression test for #26415.""" - X, resp = _generate_data(seed=12345, n_samples=100, n_features=3, n_components=4) + X, resp = _generate_data( + seed=global_random_seed, + n_samples=100, + n_features=3, + n_components=4, + ) precisions_init, desired_precisions_cholesky = _calculate_precisions( X, resp, covariance_type