diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py index 8ce3f05b7031c..1458891ce1c8c 100644 --- a/sklearn/decomposition/_lda.py +++ b/sklearn/decomposition/_lda.py @@ -13,12 +13,11 @@ import numpy as np import scipy.sparse as sp -from scipy.special import gammaln +from scipy.special import gammaln, logsumexp from joblib import Parallel, delayed, effective_n_jobs from ..base import BaseEstimator, TransformerMixin from ..utils import check_random_state, gen_batches, gen_even_slices -from ..utils.fixes import logsumexp from ..utils.validation import check_non_negative from ..utils.validation import check_is_fitted diff --git a/sklearn/ensemble/_gb_losses.py b/sklearn/ensemble/_gb_losses.py index f400144abc4fc..7bd5faca1d7d9 100644 --- a/sklearn/ensemble/_gb_losses.py +++ b/sklearn/ensemble/_gb_losses.py @@ -6,10 +6,9 @@ from abc import abstractmethod import numpy as np -from scipy.special import expit +from scipy.special import expit, logsumexp from ..tree._tree import TREE_LEAF -from ..utils.fixes import logsumexp from ..utils.stats import _weighted_percentile from ..dummy import DummyClassifier from ..dummy import DummyRegressor diff --git a/sklearn/ensemble/_hist_gradient_boosting/loss.py b/sklearn/ensemble/_hist_gradient_boosting/loss.py index 2dbf8bd58773e..c7884a25a9c41 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/loss.py +++ b/sklearn/ensemble/_hist_gradient_boosting/loss.py @@ -9,11 +9,7 @@ from abc import ABC, abstractmethod import numpy as np -from scipy.special import expit -try: # logsumexp was moved from mist to special in 0.19 - from scipy.special import logsumexp -except ImportError: - from scipy.misc import logsumexp +from scipy.special import expit, logsumexp from .common import Y_DTYPE from .common import G_H_DTYPE diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 9164751bac256..0b504ee43a7ba 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -20,6 +20,7 @@ from scipy.sparse import csr_matrix from scipy.sparse import csc_matrix from scipy.sparse import coo_matrix +from scipy.special import comb import pytest @@ -47,7 +48,6 @@ from sklearn.model_selection import GridSearchCV from sklearn.svm import LinearSVC from sklearn.utils.validation import check_random_state -from sklearn.utils.fixes import comb from sklearn.tree._classes import SPARSE_SPLITTERS diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py index 58a35d157c7a4..88eff8503d510 100644 --- a/sklearn/impute/_iterative.py +++ b/sklearn/impute/_iterative.py @@ -1,10 +1,8 @@ from time import time -from distutils.version import LooseVersion from collections import namedtuple import warnings -import scipy from scipy import stats import numpy as np @@ -329,19 +327,10 @@ def _impute_one_feature(self, a = (self._min_value[feat_idx] - mus) / sigmas b = (self._max_value[feat_idx] - mus) / sigmas - if scipy.__version__ < LooseVersion('0.18'): - # bug with vector-valued `a` in old scipy - imputed_values[inrange_mask] = [ - stats.truncnorm(a=a_, b=b_, - loc=loc_, scale=scale_).rvs( - random_state=self.random_state_) - for a_, b_, loc_, scale_ - in zip(a, b, mus, sigmas)] - else: - truncated_normal = stats.truncnorm(a=a, b=b, - loc=mus, scale=sigmas) - imputed_values[inrange_mask] = truncated_normal.rvs( - random_state=self.random_state_) + truncated_normal = stats.truncnorm(a=a, b=b, + loc=mus, scale=sigmas) + imputed_values[inrange_mask] = truncated_normal.rvs( + random_state=self.random_state_) else: imputed_values = estimator.predict(X_test) imputed_values = np.clip(imputed_values, diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 5b3adcb002c5d..8b89458525590 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -15,7 +15,7 @@ import numpy as np from scipy import optimize, sparse -from scipy.special import expit +from scipy.special import expit, logsumexp from joblib import Parallel, delayed, effective_n_jobs from ._base import LinearClassifierMixin, SparseCoefMixin, BaseEstimator @@ -27,7 +27,6 @@ from ..utils.extmath import (log_logistic, safe_sparse_dot, softmax, squared_norm) from ..utils.extmath import row_norms -from ..utils.fixes import logsumexp from ..utils.optimize import _newton_cg, _check_optimize_result from ..utils.validation import check_is_fitted, _check_sample_weight from ..utils.multiclass import check_classification_targets diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py index 6bb156c64715b..4a66d6d576add 100644 --- a/sklearn/linear_model/tests/test_sag.py +++ b/sklearn/linear_model/tests/test_sag.py @@ -7,6 +7,7 @@ import pytest import numpy as np import scipy.sparse as sp +from scipy.special import logsumexp from sklearn.linear_model._sag import get_auto_step_size from sklearn.linear_model._sag_fast import _multinomial_grad_loss_all_samples @@ -14,7 +15,6 @@ from sklearn.linear_model._base import make_dataset from sklearn.linear_model._logistic import _multinomial_loss_grad -from sklearn.utils.fixes import logsumexp from sklearn.utils.extmath import row_norms from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_array_almost_equal diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index dde32dd3f25ce..973c45a908bf1 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -19,10 +19,11 @@ import numpy as np from scipy import sparse as sp +from scipy.special import comb from ._expected_mutual_info_fast import expected_mutual_information from ...utils.validation import check_array, check_consistent_length -from ...utils.fixes import comb, _astype_copy_false +from ...utils.fixes import _astype_copy_false def _comb2(n): diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 5c09d67f6e63d..ee6c81a149b3b 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -9,6 +9,7 @@ from time import time import numpy as np +from scipy.special import logsumexp from .. import cluster from ..base import BaseEstimator @@ -16,7 +17,6 @@ from ..exceptions import ConvergenceWarning from ..utils import check_array, check_random_state from ..utils.validation import check_is_fitted -from ..utils.fixes import logsumexp def _check_shape(param, param_shape, name): diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 55e770d701858..bbd8b3f33dfb7 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -21,6 +21,7 @@ import warnings import numpy as np +from numpy.ma import MaskedArray from scipy.stats import rankdata from ..base import BaseEstimator, is_classifier, clone @@ -31,7 +32,6 @@ from ..exceptions import NotFittedError from joblib import Parallel, delayed from ..utils import check_random_state -from ..utils.fixes import MaskedArray from ..utils.random import sample_without_replacement from ..utils.validation import indexable, check_is_fitted, _check_fit_params from ..utils.metaestimators import if_delegate_has_method diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 0b769aefe120c..90ffb3989349a 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -18,13 +18,13 @@ from inspect import signature import numpy as np +from scipy.special import comb from ..utils import indexable, check_random_state, _safe_indexing from ..utils import _approximate_mode from ..utils.validation import _num_samples, column_or_1d from ..utils.validation import check_array from ..utils.multiclass import type_of_target -from ..utils.fixes import comb from ..base import _pprint __all__ = ['BaseCrossValidator', diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index 875e113f8dc36..0205eb8901699 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -4,6 +4,7 @@ import numpy as np from scipy.sparse import coo_matrix, csc_matrix, csr_matrix from scipy import stats +from scipy.special import comb from itertools import combinations from itertools import combinations_with_replacement from itertools import permutations @@ -46,8 +47,6 @@ from sklearn.datasets import load_digits from sklearn.datasets import make_classification -from sklearn.utils.fixes import comb - from sklearn.svm import SVC X = np.ones(10) diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index bcd9da1cb72fc..6ef3895ffdb60 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -21,6 +21,7 @@ import numpy as np +from scipy.special import logsumexp from .base import BaseEstimator, ClassifierMixin from .preprocessing import binarize @@ -28,7 +29,6 @@ from .preprocessing import label_binarize from .utils import check_X_y, check_array, deprecated from .utils.extmath import safe_sparse_dot -from .utils.fixes import logsumexp from .utils.multiclass import _check_partial_fit_first_call from .utils.validation import check_is_fitted, check_non_negative, column_or_1d from .utils.validation import _check_sample_weight diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index ab66847854f33..aa3d8d9dabca8 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -7,7 +7,6 @@ from ..base import BaseEstimator, TransformerMixin from ..utils import check_array -from ..utils.fixes import _argmax from ..utils.validation import check_is_fitted from ._label import _encode, _encode_check_unknown @@ -527,7 +526,7 @@ def inverse_transform(self, X): continue sub = X[:, j:j + n_categories] # for sparse X argmax returns 2D matrix, ensure 1D array - labels = np.asarray(_argmax(sub, axis=1)).flatten() + labels = np.asarray(sub.argmax(axis=1)).flatten() X_tr[:, i] = cats[labels] if self.handle_unknown == 'ignore': unknown = np.asarray(sub.sum(axis=1) == 0).flatten() diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 03e220eab29ae..e44a2e7af5bcb 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -34,11 +34,6 @@ def _parse_version(version_string): sp_version = _parse_version(scipy.__version__) -try: # SciPy >= 0.19 - from scipy.special import comb, logsumexp -except ImportError: - from scipy.misc import comb, logsumexp # noqa - if sp_version >= (1, 4): from scipy.sparse.linalg import lobpcg else: @@ -53,125 +48,6 @@ def _parse_version(version_string): else: from scipy.linalg import pinvh # noqa -if sp_version >= (0, 19): - def _argmax(arr_or_spmatrix, axis=None): - return arr_or_spmatrix.argmax(axis=axis) -else: - # Backport of argmax functionality from scipy 0.19.1, can be removed - # once support for scipy 0.18 and below is dropped - - def _find_missing_index(ind, n): - for k, a in enumerate(ind): - if k != a: - return k - - k += 1 - if k < n: - return k - else: - return -1 - - def _arg_min_or_max_axis(self, axis, op, compare): - if self.shape[axis] == 0: - raise ValueError("Can't apply the operation along a zero-sized " - "dimension.") - - if axis < 0: - axis += 2 - - zero = self.dtype.type(0) - - mat = self.tocsc() if axis == 0 else self.tocsr() - mat.sum_duplicates() - - ret_size, line_size = mat._swap(mat.shape) - ret = np.zeros(ret_size, dtype=int) - - nz_lines, = np.nonzero(np.diff(mat.indptr)) - for i in nz_lines: - p, q = mat.indptr[i:i + 2] - data = mat.data[p:q] - indices = mat.indices[p:q] - am = op(data) - m = data[am] - if compare(m, zero) or q - p == line_size: - ret[i] = indices[am] - else: - zero_ind = _find_missing_index(indices, line_size) - if m == zero: - ret[i] = min(am, zero_ind) - else: - ret[i] = zero_ind - - if axis == 1: - ret = ret.reshape(-1, 1) - - return np.asmatrix(ret) - - def _arg_min_or_max(self, axis, out, op, compare): - if out is not None: - raise ValueError("Sparse matrices do not support " - "an 'out' parameter.") - - # validateaxis(axis) - - if axis is None: - if 0 in self.shape: - raise ValueError("Can't apply the operation to " - "an empty matrix.") - - if self.nnz == 0: - return 0 - else: - zero = self.dtype.type(0) - mat = self.tocoo() - mat.sum_duplicates() - am = op(mat.data) - m = mat.data[am] - - if compare(m, zero): - return mat.row[am] * mat.shape[1] + mat.col[am] - else: - size = np.product(mat.shape) - if size == mat.nnz: - return am - else: - ind = mat.row * mat.shape[1] + mat.col - zero_ind = _find_missing_index(ind, size) - if m == zero: - return min(zero_ind, am) - else: - return zero_ind - - return _arg_min_or_max_axis(self, axis, op, compare) - - def _sparse_argmax(self, axis=None, out=None): - return _arg_min_or_max(self, axis, out, np.argmax, np.greater) - - def _argmax(arr_or_matrix, axis=None): - if sp.issparse(arr_or_matrix): - return _sparse_argmax(arr_or_matrix, axis=axis) - else: - return arr_or_matrix.argmax(axis=axis) - - -if np_version < (1, 12): - class MaskedArray(np.ma.MaskedArray): - # Before numpy 1.12, np.ma.MaskedArray object is not picklable - # This fix is needed to make our model_selection.GridSearchCV - # picklable as the ``cv_results_`` param uses MaskedArray - def __getstate__(self): - """Return the internal state of the masked array, for pickling - purposes. - - """ - cf = 'CF'[self.flags.fnc] - data_state = super(np.ma.MaskedArray, self).__reduce__()[2] - return data_state + (np.ma.getmaskarray(self).tostring(cf), - self._fill_value) -else: - from numpy.ma import MaskedArray # noqa - def _object_dtype_isnan(X): return X != X diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py index 21ddaf7d3ec5c..a2fa702f19c4a 100644 --- a/sklearn/utils/tests/test_fixes.py +++ b/sklearn/utils/tests/test_fixes.py @@ -4,7 +4,6 @@ # License: BSD 3 clause import math -import pickle import numpy as np import pytest @@ -12,22 +11,11 @@ from sklearn.utils._testing import assert_array_equal -from sklearn.utils.fixes import MaskedArray from sklearn.utils.fixes import _joblib_parallel_args from sklearn.utils.fixes import _object_dtype_isnan from sklearn.utils.fixes import loguniform -def test_masked_array_obj_dtype_pickleable(): - marr = MaskedArray([1, None, 'a'], dtype=object) - - for mask in (True, False, [0, 1, 0]): - marr.mask = mask - marr_pickled = pickle.loads(pickle.dumps(marr)) - assert_array_equal(marr.data, marr_pickled.data) - assert_array_equal(marr.mask, marr_pickled.mask) - - @pytest.mark.parametrize('joblib_version', ('0.11', '0.12.0')) def test_joblib_parallel_args(monkeypatch, joblib_version): import joblib diff --git a/sklearn/utils/tests/test_random.py b/sklearn/utils/tests/test_random.py index 7d2437471aabb..c9ff69ec8d8b8 100644 --- a/sklearn/utils/tests/test_random.py +++ b/sklearn/utils/tests/test_random.py @@ -1,9 +1,9 @@ import numpy as np import pytest import scipy.sparse as sp +from scipy.special import comb from numpy.testing import assert_array_almost_equal -from sklearn.utils.fixes import comb from sklearn.utils.random import _random_choice_csc, sample_without_replacement from sklearn.utils._random import _our_rand_r_py