8000 MAINT Remove outdated numpy and scipy backports by rth · Pull Request #16725 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

MAINT Remove outdated numpy and scipy backports #16725

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions sklearn/decomposition/_lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@

import numpy as np
import scipy.sparse as sp
from scipy.special import gammaln
from scipy.special import gammaln, logsumexp
from joblib import Parallel, delayed, effective_n_jobs

from ..base import BaseEstimator, TransformerMixin
from ..utils import check_random_state, gen_batches, gen_even_slices
from ..utils.fixes import logsumexp
from ..utils.validation import check_non_negative
from ..utils.validation import check_is_fitted

Expand Down
3 changes: 1 addition & 2 deletions sklearn/ensemble/_gb_losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
from abc import abstractmethod

import numpy as np
from scipy.special import expit
from scipy.special import expit, logsumexp

from ..tree._tree import TREE_LEAF
from ..utils.fixes import logsumexp
from ..utils.stats import _weighted_percentile
from ..dummy import DummyClassifier
from ..dummy import DummyRegressor
Expand Down
6 changes: 1 addition & 5 deletions sklearn/ensemble/_hist_gradient_boosting/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@
from abc import ABC, abstractmethod

import numpy as np
from scipy.special import expit
try: # logsumexp was moved from mist to special in 0.19
from scipy.special import logsumexp
except ImportError:
from scipy.misc import logsumexp
from scipy.special import expit, logsumexp

from .common import Y_DTYPE
from .common import G_H_DTYPE
Expand Down
2 changes: 1 addition & 1 deletion sklearn/ensemble/tests/test_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from scipy.sparse import csr_matrix
from scipy.sparse import csc_matrix
from scipy.sparse import coo_matrix
from scipy.special import comb

import pytest

Expand Down Expand Up @@ -47,7 +48,6 @@
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.utils.validation import check_random_state
from sklearn.utils.fixes import comb

from sklearn.tree._classes import SPARSE_SPLITTERS

Expand Down
19 changes: 4 additions & 15 deletions sklearn/impute/_iterative.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@

from time import time
from distutils.version import LooseVersion
from collections import namedtuple
import warnings

import scipy
from scipy import stats
import numpy as np

Expand Down Expand Up @@ -329,19 +327,10 @@ def _impute_one_feature(self,
a = (self._min_value[feat_idx] - mus) / sigmas
b = (self._max_value[feat_idx] - mus) / sigmas

if scipy.__version__ < LooseVersion('0.18'):
# bug with vector-valued `a` in old scipy
imputed_values[inrange_mask] = [
stats.truncnorm(a=a_, b=b_,
loc=loc_, scale=scale_).rvs(
random_state=self.random_state_)
for a_, b_, loc_, scale_
in zip(a, b, mus, sigmas)]
else:
truncated_normal = stats.truncnorm(a=a, b=b,
loc=mus, scale=sigmas)
imputed_values[inrange_mask] = truncated_normal.rvs(
random_state=self.random_state_)
truncated_normal = stats.truncnorm(a=a, b=b,
loc=mus, scale=sigmas)
imputed_values[inrange_mask] = truncated_normal.rvs(
random_state=self.random_state_)
else:
imputed_values = estimator.predict(X_test)
imputed_values = np.clip(imputed_values,
Expand Down
3 changes: 1 addition & 2 deletions sklearn/linear_model/_logistic.py
341A
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import numpy as np
from scipy import optimize, sparse
from scipy.special import expit
from scipy.special import expit, logsumexp
from joblib import Parallel, delayed, effective_n_jobs

from ._base import LinearClassifierMixin, SparseCoefMixin, BaseEstimator
Expand All @@ -27,7 +27,6 @@
from ..utils.extmath import (log_logistic, safe_sparse_dot, softmax,
squared_norm)
from ..utils.extmath import row_norms
from ..utils.fixes import logsumexp
from ..utils.optimize import _newton_cg, _check_optimize_result
from ..utils.validation import check_is_fitted, _check_sample_weight
from ..utils.multiclass import check_classification_targets
Expand Down
2 changes: 1 addition & 1 deletion sklearn/linear_model/tests/test_sag.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import pytest
import numpy as np
import scipy.sparse as sp
from scipy.special import logsumexp

from sklearn.linear_model._sag import get_auto_step_size
from sklearn.linear_model._sag_fast import _multinomial_grad_loss_all_samples
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.linear_model._base import make_dataset
from sklearn.linear_model._logistic import _multinomial_loss_grad

from sklearn.utils.fixes import logsumexp
from sklearn.utils.extmath import row_norms
from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import assert_array_almost_equal
Expand Down
3 changes: 2 additions & 1 deletion sklearn/metrics/cluster/_supervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@

import numpy as np
from scipy import sparse as sp
from scipy.special import comb

from ._expected_mutual_info_fast import expected_mutual_information
from ...utils.validation import check_array, check_consistent_length
from ...utils.fixes import comb, _astype_copy_false
from ...utils.fixes import _astype_copy_false


def _comb2(n):
Expand Down
2 changes: 1 addition & 1 deletion sklearn/mixture/_base.py
Diff line change
Original file line number Diff line number
Expand Up @@ -9,14 +9,14 @@
from time import time

import numpy as np
from scipy.special import logsumexp

from .. import cluster
from ..base import BaseEstimator
from ..base import DensityMixin
from ..exceptions import ConvergenceWarning
from ..utils import check_array, check_random_state
from ..utils.validation import check_is_fitted
from ..utils.fixes import logsumexp


def _check_shape(param, param_shape, name):
Expand Down
2 changes: 1 addition & 1 deletion sklearn/model_selection/_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import warnings

import numpy as np
from numpy.ma import MaskedArray
from scipy.stats import rankdata

from ..base import BaseEstimator, is_classifier, clone
Expand All @@ -31,7 +32,6 @@
from ..exceptions import NotFittedError
from joblib import Parallel, delayed
from ..utils import check_random_state
from ..utils.fixes import MaskedArray
from ..utils.random import sample_without_replacement
from ..utils.validation import indexable, check_is_fitted, _check_fit_params
from ..utils.metaestimators import if_delegate_has_method
Expand Down
2 changes: 1 addition & 1 deletion sklearn/model_selection/_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
from inspect import signature

import numpy as np
from scipy.special import comb

from ..utils import indexable, check_random_state, _safe_indexing
from ..utils import _approximate_mode
from ..utils.validation import _num_samples, column_or_1d
from ..utils.validation import check_array
from ..utils.multiclass import type_of_target
from ..utils.fixes import comb
from ..base import _pprint

__all__ = ['BaseCrossValidator',
Expand Down
3 changes: 1 addition & 2 deletions sklearn/model_selection/tests/test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
from scipy.sparse import coo_matrix, csc_matrix, csr_matrix
from scipy import stats
from scipy.special import comb
from itertools import combinations
from itertools import combinations_with_replacement
from itertools import permutations
Expand Down Expand Up @@ -46,8 +47,6 @@
from sklearn.datasets import load_digits
from sklearn.datasets import make_classification

from sklearn.utils.fixes import comb

from sklearn.svm import SVC

X = np.ones(10)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@


import numpy as np
from scipy.special import logsumexp

from .base import BaseEstimator, ClassifierMixin
from .preprocessing import binarize
from .preprocessing import LabelBinarizer
from .preprocessing import label_binarize
from .utils import check_X_y, check_array, deprecated
from .utils.extmath import safe_sparse_dot
from .utils.fixes import logsumexp
from .utils.multiclass import _check_partial_fit_first_call
from .utils.validation import check_is_fitted, check_non_negative, column_or_1d
from .utils.validation import _check_sample_weight
Expand Down
3 changes: 1 addition & 2 deletions sklearn/preprocessing/_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from ..base import BaseEstimator, TransformerMixin
from ..utils import check_array
from ..utils.fixes import _argmax
from ..utils.validation import check_is_fitted

from ._label import _encode, _encode_check_unknown
Expand Down Expand Up @@ -527,7 +526,7 @@ def inverse_transform(self, X):
continue
sub = X[:, j:j + n_categories]
# for sparse X argmax returns 2D matrix, ensure 1D array
labels = np.asarray(_argmax(sub, axis=1)).flatten()
labels = np.asarray(sub.argmax(axis=1)).flatten()
X_tr[:, i] = cats[labels]
if self.handle_unknown == 'ignore':
unknown = np.asarray(sub.sum(axis=1) == 0).flatten()
Expand Down
124 changes: 0 additions & 124 deletions sklearn/utils/fixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ def _parse_version(version_string):
sp_version = _parse_version(scipy.__version__)


try: # SciPy >= 0.19
from scipy.special import comb, logsumexp
except ImportError:
from scipy.misc import comb, logsumexp # noqa

if sp_version >= (1, 4):
from scipy.sparse.linalg import lobpcg
else:
Expand All @@ -53,125 +48,6 @@ def _parse_version(version_string):
else:
from scipy.linalg import pinvh # noqa

if sp_version >= (0, 19):
def _argmax(arr_or_spmatrix, axis=None):
return arr_or_spmatrix.argmax(axis=axis)
else:
# Backport of argmax functionality from scipy 0.19.1, can be removed
# once support for scipy 0.18 and below is dropped

def _find_missing_index(ind, n):
for k, a in enumerate(ind):
if k != a:
return k

k += 1
if k < n:
return k
else:
return -1

def _arg_min_or_max_axis(self, axis, op, compare):
if self.shape[axis] == 0:
raise ValueError("Can't apply the operation along a zero-sized "
"dimension.")

if axis < 0:
axis += 2

zero = self.dtype.type(0)

mat = self.tocsc() if axis == 0 else self.tocsr()
mat.sum_duplicates()

ret_size, line_size = mat._swap(mat.shape)
ret = np.zeros(ret_size, dtype=int)

nz_lines, = np.nonzero(np.diff(mat.indptr))
for i in nz_lines:
p, q = mat.indptr[i:i + 2]
data = mat.data[p:q]
indices = mat.indices[p:q]
am = op(data)
m = data[am]
if compare(m, zero) or q - p == line_size:
ret[i] = indices[am]
else:
zero_ind = _find_missing_index(indices, line_size)
if m == zero:
ret[i] = min(am, zero_ind)
else:
ret[i] = zero_ind

if axis == 1:
ret = ret.reshape(-1, 1)

return np.asmatrix(ret)

def _arg_min_or_max(self, axis, out, op, compare):
if out is not None:
raise ValueError("Sparse matrices do not support "
"an 'out' parameter.")

# validateaxis(axis)

if axis is None:
if 0 in self.shape:
raise ValueError("Can't apply the operation to "
"an empty matrix.")

if self.nnz == 0:
return 0
else:
zero = self.dtype.type(0)
mat = self.tocoo()
mat.sum_duplicates()
am = op(mat.data)
m = mat.data[am]

if compare(m, zero):
return mat.row[am] * mat.shape[1] + mat.col[am]
else:
size = np.product(mat.shape)
if size == mat.nnz:
return am
else:
ind = mat.row * mat.shape[1] + mat.col
zero_ind = _find_missing_index(ind, size)
if m == zero:
return min(zero_ind, am)
else:
return zero_ind

return _arg_min_or_max_axis(self, axis, op, compare)

def _sparse_argmax(self, axis=None, out=None):
return _arg_min_or_max(self, axis, out, np.argmax, np.greater)

def _argmax(arr_or_matrix, axis=None):
if sp.issparse(arr_or_matrix):
return _sparse_argmax(arr_or_matrix, axis=axis)
else:
return arr_or_matrix.argmax(axis=axis)


if np_version < (1, 12):
class MaskedArray(np.ma.MaskedArray):
# Before numpy 1.12, np.ma.MaskedArray object is not picklable
# This fix is needed to make our model_selection.GridSearchCV
# picklable as the ``cv_results_`` param uses MaskedArray
def __getstate__(self):
"""Return the internal state of the masked array, for pickling
purposes.

"""
cf = 'CF'[self.flags.fnc]
data_state = super(np.ma.MaskedArray, self).__reduce__()[2]
return data_state + (np.ma.getmaskarray(self).tostring(cf),
self._fill_value)
else:
from numpy.ma import MaskedArray # noqa


def _object_dtype_isnan(X):
return X != X
Expand Down
Loading
0