From db95a3631618e7ea0384ee7cb3315de3725cb70a Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 21 Oct 2019 11:38:59 -0400 Subject: [PATCH 1/3] API Deprecated paths in manifold --- sklearn/_build_utils/deprecated_modules.py | 8 + sklearn/decomposition/_base.py | 159 ++++++++++++++++++ sklearn/decomposition/_online_lda_fast.pyx | 113 +++++++++++++ sklearn/manifold/__init__.py | 10 +- sklearn/manifold/{isomap.py => _isomap.py} | 0 .../{locally_linear.py => _locally_linear.py} | 0 sklearn/manifold/{mds.py => _mds.py} | 0 ..._embedding_.py => _spectral_embedding_.py} | 0 sklearn/manifold/{t_sne.py => _t_sne.py} | 0 sklearn/manifold/tests/test_locally_linear.py | 2 +- .../manifold/tests/test_spectral_embedding.py | 6 +- sklearn/manifold/tests/test_t_sne.py | 14 +- 12 files changed, 296 insertions(+), 16 deletions(-) create mode 100644 sklearn/decomposition/_base.py create mode 100644 sklearn/decomposition/_online_lda_fast.pyx rename sklearn/manifold/{isomap.py => _isomap.py} (100%) rename sklearn/manifold/{locally_linear.py => _locally_linear.py} (100%) rename sklearn/manifold/{mds.py => _mds.py} (100%) rename sklearn/manifold/{spectral_embedding_.py => _spectral_embedding_.py} (100%) rename sklearn/manifold/{t_sne.py => _t_sne.py} (100%) diff --git a/sklearn/_build_utils/deprecated_modules.py b/sklearn/_build_utils/deprecated_modules.py index 6f2a7d4b329eb..4a36aa5929720 100644 --- a/sklearn/_build_utils/deprecated_modules.py +++ b/sklearn/_build_utils/deprecated_modules.py @@ -85,6 +85,14 @@ ('_libsvm_sparse', 'sklearn.svm.libsvm_sparse', 'sklearn.svm', 'set_verbosity_wrap'), ('_liblinear', 'sklearn.svm.liblinear', 'sklearn.svm', 'train_wrap'), + + ('_isomap', 'sklearn.manifold.isomap', 'sklearn.manifold', 'Isomap'), + ('_locally_linear', 'sklearn.manifold.locally_linear', 'sklearn.manifold', + 'LocallyLinearEmbedding'), + ('_mds', 'sklearn.manifold.mds', 'sklearn.manifold', 'MDS'), + ('_spectral_embedding_', 'sklearn.manifold.spectral_embedding_', + 'sklearn.manifold', 'SpectralEmbedding'), + ('_t_sne', 'sklearn.manifold.t_sne', 'sklearn.manifold', 'TSNE') ] diff --git a/sklearn/decomposition/_base.py b/sklearn/decomposition/_base.py new file mode 100644 index 0000000000000..e89a05051404b --- /dev/null +++ b/sklearn/decomposition/_base.py @@ -0,0 +1,159 @@ +"""Principal Component Analysis Base Classes""" + +# Author: Alexandre Gramfort +# Olivier Grisel +# Mathieu Blondel +# Denis A. Engemann +# Kyle Kastner +# +# License: BSD 3 clause + +import numpy as np +from scipy import linalg + +from ..base import BaseEstimator, TransformerMixin +from ..utils import check_array +from ..utils.validation import check_is_fitted +from abc import ABCMeta, abstractmethod + + +class _BasePCA(TransformerMixin, BaseEstimator, metaclass=ABCMeta): + """Base class for PCA methods. + + Warning: This class should not be used directly. + Use derived classes instead. + """ + def get_covariance(self): + """Compute data covariance with the generative model. + + ``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)`` + where S**2 contains the explained variances, and sigma2 contains the + noise variances. + + Returns + ------- + cov : array, shape=(n_features, n_features) + Estimated covariance of data. + """ + components_ = self.components_ + exp_var = self.explained_variance_ + if self.whiten: + components_ = components_ * np.sqrt(exp_var[:, np.newaxis]) + exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.) + cov = np.dot(components_.T * exp_var_diff, components_) + cov.flat[::len(cov) + 1] += self.noise_variance_ # modify diag inplace + return cov + + def get_precision(self): + """Compute data precision matrix with the generative model. + + Equals the inverse of the covariance but computed with + the matrix inversion lemma for efficiency. + + Returns + ------- + precision : array, shape=(n_features, n_features) + Estimated precision of data. + """ + n_features = self.components_.shape[1] + + # handle corner cases first + if self.n_components_ == 0: + return np.eye(n_features) / self.noise_variance_ + if self.n_components_ == n_features: + return linalg.inv(self.get_covariance()) + + # Get precision using matrix inversion lemma + components_ = self.components_ + exp_var = self.explained_variance_ + if self.whiten: + components_ = components_ * np.sqrt(exp_var[:, np.newaxis]) + exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.) + precision = np.dot(components_, components_.T) / self.noise_variance_ + precision.flat[::len(precision) + 1] += 1. / exp_var_diff + precision = np.dot(components_.T, + np.dot(linalg.inv(precision), components_)) + precision /= -(self.noise_variance_ ** 2) + precision.flat[::len(precision) + 1] += 1. / self.noise_variance_ + return precision + + @abstractmethod + def fit(X, y=None): + """Placeholder for fit. Subclasses should implement this method! + + Fit the model with X. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + Training data, where n_samples is the number of samples and + n_features is the number of features. + + Returns + ------- + self : object + Returns the instance itself. + """ + + def transform(self, X): + """Apply dimensionality reduction to X. + + X is projected on the first principal components previously extracted + from a training set. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + New data, where n_samples is the number of samples + and n_features is the number of features. + + Returns + ------- + X_new : array-like, shape (n_samples, n_components) + + Examples + -------- + + >>> import numpy as np + >>> from sklearn.decomposition import IncrementalPCA + >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) + >>> ipca = IncrementalPCA(n_components=2, batch_size=3) + >>> ipca.fit(X) + IncrementalPCA(batch_size=3, n_components=2) + >>> ipca.transform(X) # doctest: +SKIP + """ + check_is_fitted(self) + + X = check_array(X) + if self.mean_ is not None: + X = X - self.mean_ + X_transformed = np.dot(X, self.components_.T) + if self.whiten: + X_transformed /= np.sqrt(self.explained_variance_) + return X_transformed + + def inverse_transform(self, X): + """Transform data back to its original space. + + In other words, return an input X_original whose transform would be X. + + Parameters + ---------- + X : array-like, shape (n_samples, n_components) + New data, where n_samples is the number of samples + and n_components is the number of components. + + Returns + ------- + X_original array-like, shape (n_samples, n_features) + + Notes + ----- + If whitening is enabled, inverse_transform will compute the + exact inverse operation, which includes reversing whitening. + """ + if self.whiten: + return np.dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) * + self.components_) + self.mean_ + else: + return np.dot(X, self.components_) + self.mean_ diff --git a/sklearn/decomposition/_online_lda_fast.pyx b/sklearn/decomposition/_online_lda_fast.pyx new file mode 100644 index 0000000000000..1c00af02d2375 --- /dev/null +++ b/sklearn/decomposition/_online_lda_fast.pyx @@ -0,0 +1,113 @@ +# +# cython: boundscheck=False, wraparound=False + +cimport cython +cimport numpy as np +import numpy as np + +np.import_array() + +from libc.math cimport exp, fabs, log +from numpy.math cimport EULER + + +def mean_change(np.ndarray[ndim=1, dtype=np.float64_t] arr_1, + np.ndarray[ndim=1, dtype=np.float64_t] arr_2): + """Calculate the mean difference between two arrays. + + Equivalent to np.abs(arr_1 - arr2).mean(). + """ + + cdef np.float64_t total, diff + cdef np.npy_intp i, size + + size = arr_1.shape[0] + total = 0.0 + for i in range(size): + diff = fabs(arr_1[i] - arr_2[i]) + total += diff + + return total / size + + +def _dirichlet_expectation_1d(np.ndarray[ndim=1, dtype=np.float64_t] doc_topic, + double doc_topic_prior, + np.ndarray[ndim=1, dtype=np.float64_t] out): + """Dirichlet expectation for a single sample: + exp(E[log(theta)]) for theta ~ Dir(doc_topic) + after adding doc_topic_prior to doc_topic, in-place. + + Equivalent to + doc_topic += doc_topic_prior + out[:] = np.exp(psi(doc_topic) - psi(np.sum(doc_topic))) + """ + + cdef np.float64_t dt, psi_total, total + cdef np.npy_intp i, size + + size = doc_topic.shape[0] + + total = 0.0 + for i in range(size): + dt = doc_topic[i] + doc_topic_prior + doc_topic[i] = dt + total += dt + psi_total = psi(total) + + for i in range(size): + out[i] = exp(psi(doc_topic[i]) - psi_total) + + +def _dirichlet_expectation_2d(np.ndarray[ndim=2, dtype=np.float64_t] arr): + """Dirichlet expectation for multiple samples: + E[log(theta)] for theta ~ Dir(arr). + + Equivalent to psi(arr) - psi(np.sum(arr, axis=1))[:, np.newaxis]. + + Note that unlike _dirichlet_expectation_1d, this function doesn't compute + the exp and doesn't add in the prior. + """ + cdef np.float64_t row_total, psi_row_total + cdef np.ndarray[ndim=2, dtype=np.float64_t] d_exp + cdef np.npy_intp i, j, n_rows, n_cols + + n_rows = arr.shape[0] + n_cols = arr.shape[1] + + d_exp = np.empty_like(arr) + for i in range(n_rows): + row_total = 0 + for j in range(n_cols): + row_total += arr[i, j] + psi_row_total = psi(row_total) + + for j in range(n_cols): + d_exp[i, j] = psi(arr[i, j]) - psi_row_total + + return d_exp + + +# Psi function for positive arguments. Optimized for speed, not accuracy. +# +# After: J. Bernardo (1976). Algorithm AS 103: Psi (Digamma) Function. +# https://www.uv.es/~bernardo/1976AppStatist.pdf +@cython.cdivision(True) +cdef double psi(double x) nogil: + if x <= 1e-6: + # psi(x) = -EULER - 1/x + O(x) + return -EULER - 1. / x + + cdef double r, result = 0 + + # psi(x + 1) = psi(x) + 1/x + while x < 6: + result -= 1. / x + x += 1 + + # psi(x) = log(x) - 1/(2x) - 1/(12x**2) + 1/(120x**4) - 1/(252x**6) + # + O(1/x**8) + r = 1. / x + result += log(x) - .5 * r + r = r * r + result -= r * ((1./12.) - r * ((1./120.) - r * (1./252.))) + return result; diff --git a/sklearn/manifold/__init__.py b/sklearn/manifold/__init__.py index 12ee523f548d2..fdb26a2ca330e 100644 --- a/sklearn/manifold/__init__.py +++ b/sklearn/manifold/__init__.py @@ -2,11 +2,11 @@ The :mod:`sklearn.manifold` module implements data embedding techniques. """ -from .locally_linear import locally_linear_embedding, LocallyLinearEmbedding -from .isomap import Isomap -from .mds import MDS, smacof -from .spectral_embedding_ import SpectralEmbedding, spectral_embedding -from .t_sne import TSNE +from ._locally_linear import locally_linear_embedding, LocallyLinearEmbedding +from ._isomap import Isomap +from ._mds import MDS, smacof +from ._spectral_embedding_ import SpectralEmbedding, spectral_embedding +from ._t_sne import TSNE __all__ = ['locally_linear_embedding', 'LocallyLinearEmbedding', 'Isomap', 'MDS', 'smacof', 'SpectralEmbedding', 'spectral_embedding', "TSNE"] diff --git a/sklearn/manifold/isomap.py b/sklearn/manifold/_isomap.py similarity index 100% rename from sklearn/manifold/isomap.py rename to sklearn/manifold/_isomap.py diff --git a/sklearn/manifold/locally_linear.py b/sklearn/manifold/_locally_linear.py similarity index 100% rename from sklearn/manifold/locally_linear.py rename to sklearn/manifold/_locally_linear.py diff --git a/sklearn/manifold/mds.py b/sklearn/manifold/_mds.py similarity index 100% rename from sklearn/manifold/mds.py rename to sklearn/manifold/_mds.py diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/_spectral_embedding_.py similarity index 100% rename from sklearn/manifold/spectral_embedding_.py rename to sklearn/manifold/_spectral_embedding_.py diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/_t_sne.py similarity index 100% rename from sklearn/manifold/t_sne.py rename to sklearn/manifold/_t_sne.py diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py index ea1edcd80111d..f01e52810751c 100644 --- a/sklearn/manifold/tests/test_locally_linear.py +++ b/sklearn/manifold/tests/test_locally_linear.py @@ -6,7 +6,7 @@ import pytest from sklearn import neighbors, manifold -from sklearn.manifold.locally_linear import barycenter_kneighbors_graph +from sklearn.manifold._locally_linear import barycenter_kneighbors_graph from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import assert_raise_message diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index a1d790c699a16..8461fdf436d7e 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -6,9 +6,9 @@ from scipy.sparse import csgraph from scipy.linalg import eigh -from sklearn.manifold.spectral_embedding_ import SpectralEmbedding -from sklearn.manifold.spectral_embedding_ import _graph_is_connected -from sklearn.manifold.spectral_embedding_ import _graph_connected_component +from sklearn.manifold import SpectralEmbedding +from sklearn.manifold._spectral_embedding_ import _graph_is_connected +from sklearn.manifold._spectral_embedding_ import _graph_connected_component from sklearn.manifold import spectral_embedding from sklearn.metrics.pairwise import rbf_kernel from sklearn.metrics import normalized_mutual_info_score diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py index 8d29f33b6867a..34a14574764d6 100644 --- a/sklearn/manifold/tests/test_t_sne.py +++ b/sklearn/manifold/tests/test_t_sne.py @@ -14,13 +14,13 @@ from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import skip_if_32bit from sklearn.utils import check_random_state -from sklearn.manifold.t_sne import _joint_probabilities -from sklearn.manifold.t_sne import _joint_probabilities_nn -from sklearn.manifold.t_sne import _kl_divergence -from sklearn.manifold.t_sne import _kl_divergence_bh -from sklearn.manifold.t_sne import _gradient_descent -from sklearn.manifold.t_sne import trustworthiness -from sklearn.manifold.t_sne import TSNE +from sklearn.manifold._t_sne import _joint_probabilities +from sklearn.manifold._t_sne import _joint_probabilities_nn +from sklearn.manifold._t_sne import _kl_divergence +from sklearn.manifold._t_sne import _kl_divergence_bh +from sklearn.manifold._t_sne import _gradient_descent +from sklearn.manifold._t_sne import trustworthiness +from sklearn.manifold import TSNE from sklearn.manifold import _barnes_hut_tsne from sklearn.manifold._utils import _binary_search_perplexity from sklearn.datasets import make_blobs From 847a1244246e62f8f762317ffa65b6b04f0e7971 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 21 Oct 2019 11:40:21 -0400 Subject: [PATCH 2/3] MNT Adds manifold to gitignore --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 22a73ab0c13a6..7b0965f8b8130 100644 --- a/.gitignore +++ b/.gitignore @@ -129,3 +129,9 @@ sklearn/svm/bounds.py sklearn/svm/libsvm.py sklearn/svm/libsvm_sparse.py sklearn/svm/liblinear.py + +sklearn/manifold/isomap.py +sklearn/manifold/locally_linear.py +sklearn/manifold/mds.py +sklearn/manifold/spectral_embedding_.py +sklearn/manifold/t_sne.py From 3c4c05e646dc98e5c2d8a2016b9fbb2b0d1bc4c0 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 21 Oct 2019 12:55:42 -0400 Subject: [PATCH 3/3] REV --- sklearn/decomposition/_base.py | 159 --------------------- sklearn/decomposition/_online_lda_fast.pyx | 113 --------------- 2 files changed, 272 deletions(-) delete mode 100644 sklearn/decomposition/_base.py delete mode 100644 sklearn/decomposition/_online_lda_fast.pyx diff --git a/sklearn/decomposition/_base.py b/sklearn/decomposition/_base.py deleted file mode 100644 index e89a05051404b..0000000000000 --- a/sklearn/decomposition/_base.py +++ /dev/null @@ -1,159 +0,0 @@ -"""Principal Component Analysis Base Classes""" - -# Author: Alexandre Gramfort -# Olivier Grisel -# Mathieu Blondel -# Denis A. Engemann -# Kyle Kastner -# -# License: BSD 3 clause - -import numpy as np -from scipy import linalg - -from ..base import BaseEstimator, TransformerMixin -from ..utils import check_array -from ..utils.validation import check_is_fitted -from abc import ABCMeta, abstractmethod - - -class _BasePCA(TransformerMixin, BaseEstimator, metaclass=ABCMeta): - """Base class for PCA methods. - - Warning: This class should not be used directly. - Use derived classes instead. - """ - def get_covariance(self): - """Compute data covariance with the generative model. - - ``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)`` - where S**2 contains the explained variances, and sigma2 contains the - noise variances. - - Returns - ------- - cov : array, shape=(n_features, n_features) - Estimated covariance of data. - """ - components_ = self.components_ - exp_var = self.explained_variance_ - if self.whiten: - components_ = components_ * np.sqrt(exp_var[:, np.newaxis]) - exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.) - cov = np.dot(components_.T * exp_var_diff, components_) - cov.flat[::len(cov) + 1] += self.noise_variance_ # modify diag inplace - return cov - - def get_precision(self): - """Compute data precision matrix with the generative model. - - Equals the inverse of the covariance but computed with - the matrix inversion lemma for efficiency. - - Returns - ------- - precision : array, shape=(n_features, n_features) - Estimated precision of data. - """ - n_features = self.components_.shape[1] - - # handle corner cases first - if self.n_components_ == 0: - return np.eye(n_features) / self.noise_variance_ - if self.n_components_ == n_features: - return linalg.inv(self.get_covariance()) - - # Get precision using matrix inversion lemma - components_ = self.components_ - exp_var = self.explained_variance_ - if self.whiten: - components_ = components_ * np.sqrt(exp_var[:, np.newaxis]) - exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.) - precision = np.dot(components_, components_.T) / self.noise_variance_ - precision.flat[::len(precision) + 1] += 1. / exp_var_diff - precision = np.dot(components_.T, - np.dot(linalg.inv(precision), components_)) - precision /= -(self.noise_variance_ ** 2) - precision.flat[::len(precision) + 1] += 1. / self.noise_variance_ - return precision - - @abstractmethod - def fit(X, y=None): - """Placeholder for fit. Subclasses should implement this method! - - Fit the model with X. - - Parameters - ---------- - X : array-like, shape (n_samples, n_features) - Training data, where n_samples is the number of samples and - n_features is the number of features. - - Returns - ------- - self : object - Returns the instance itself. - """ - - def transform(self, X): - """Apply dimensionality reduction to X. - - X is projected on the first principal components previously extracted - from a training set. - - Parameters - ---------- - X : array-like, shape (n_samples, n_features) - New data, where n_samples is the number of samples - and n_features is the number of features. - - Returns - ------- - X_new : array-like, shape (n_samples, n_components) - - Examples - -------- - - >>> import numpy as np - >>> from sklearn.decomposition import IncrementalPCA - >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) - >>> ipca = IncrementalPCA(n_components=2, batch_size=3) - >>> ipca.fit(X) - IncrementalPCA(batch_size=3, n_components=2) - >>> ipca.transform(X) # doctest: +SKIP - """ - check_is_fitted(self) - - X = check_array(X) - if self.mean_ is not None: - X = X - self.mean_ - X_transformed = np.dot(X, self.components_.T) - if self.whiten: - X_transformed /= np.sqrt(self.explained_variance_) - return X_transformed - - def inverse_transform(self, X): - """Transform data back to its original space. - - In other words, return an input X_original whose transform would be X. - - Parameters - ---------- - X : array-like, shape (n_samples, n_components) - New data, where n_samples is the number of samples - and n_components is the number of components. - - Returns - ------- - X_original array-like, shape (n_samples, n_features) - - Notes - ----- - If whitening is enabled, inverse_transform will compute the - exact inverse operation, which includes reversing whitening. - """ - if self.whiten: - return np.dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) * - self.components_) + self.mean_ - else: - return np.dot(X, self.components_) + self.mean_ diff --git a/sklearn/decomposition/_online_lda_fast.pyx b/sklearn/decomposition/_online_lda_fast.pyx deleted file mode 100644 index 1c00af02d2375..0000000000000 --- a/sklearn/decomposition/_online_lda_fast.pyx +++ /dev/null @@ -1,113 +0,0 @@ -# -# cython: boundscheck=False, wraparound=False - -cimport cython -cimport numpy as np -import numpy as np - -np.import_array() - -from libc.math cimport exp, fabs, log -from numpy.math cimport EULER - - -def mean_change(np.ndarray[ndim=1, dtype=np.float64_t] arr_1, - np.ndarray[ndim=1, dtype=np.float64_t] arr_2): - """Calculate the mean difference between two arrays. - - Equivalent to np.abs(arr_1 - arr2).mean(). - """ - - cdef np.float64_t total, diff - cdef np.npy_intp i, size - - size = arr_1.shape[0] - total = 0.0 - for i in range(size): - diff = fabs(arr_1[i] - arr_2[i]) - total += diff - - return total / size - - -def _dirichlet_expectation_1d(np.ndarray[ndim=1, dtype=np.float64_t] doc_topic, - double doc_topic_prior, - np.ndarray[ndim=1, dtype=np.float64_t] out): - """Dirichlet expectation for a single sample: - exp(E[log(theta)]) for theta ~ Dir(doc_topic) - after adding doc_topic_prior to doc_topic, in-place. - - Equivalent to - doc_topic += doc_topic_prior - out[:] = np.exp(psi(doc_topic) - psi(np.sum(doc_topic))) - """ - - cdef np.float64_t dt, psi_total, total - cdef np.npy_intp i, size - - size = doc_topic.shape[0] - - total = 0.0 - for i in range(size): - dt = doc_topic[i] + doc_topic_prior - doc_topic[i] = dt - total += dt - psi_total = psi(total) - - for i in range(size): - out[i] = exp(psi(doc_topic[i]) - psi_total) - - -def _dirichlet_expectation_2d(np.ndarray[ndim=2, dtype=np.float64_t] arr): - """Dirichlet expectation for multiple samples: - E[log(theta)] for theta ~ Dir(arr). - - Equivalent to psi(arr) - psi(np.sum(arr, axis=1))[:, np.newaxis]. - - Note that unlike _dirichlet_expectation_1d, this function doesn't compute - the exp and doesn't add in the prior. - """ - cdef np.float64_t row_total, psi_row_total - cdef np.ndarray[ndim=2, dtype=np.float64_t] d_exp - cdef np.npy_intp i, j, n_rows, n_cols - - n_rows = arr.shape[0] - n_cols = arr.shape[1] - - d_exp = np.empty_like(arr) - for i in range(n_rows): - row_total = 0 - for j in range(n_cols): - row_total += arr[i, j] - psi_row_total = psi(row_total) - - for j in range(n_cols): - d_exp[i, j] = psi(arr[i, j]) - psi_row_total - - return d_exp - - -# Psi function for positive arguments. Optimized for speed, not accuracy. -# -# After: J. Bernardo (1976). Algorithm AS 103: Psi (Digamma) Function. -# https://www.uv.es/~bernardo/1976AppStatist.pdf -@cython.cdivision(True) -cdef double psi(double x) nogil: - if x <= 1e-6: - # psi(x) = -EULER - 1/x + O(x) - return -EULER - 1. / x - - cdef double r, result = 0 - - # psi(x + 1) = psi(x) + 1/x - while x < 6: - result -= 1. / x - x += 1 - - # psi(x) = log(x) - 1/(2x) - 1/(12x**2) + 1/(120x**4) - 1/(252x**6) - # + O(1/x**8) - r = 1. / x - result += log(x) - .5 * r - r = r * r - result -= r * ((1./12.) - r * ((1./120.) - r * (1./252.))) - return result;