diff --git a/.circleci/config.yml b/.circleci/config.yml index b407e8b15dd38..f4ee4e4cf1dfb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,12 +3,12 @@ version: 2 jobs: doc-min-dependencies: docker: - - image: circleci/python:3.7.3-stretch + - image: circleci/python:3.7.7-buster environment: - OMP_NUM_THREADS: 2 - MKL_NUM_THREADS: 2 - CONDA_ENV_NAME: testenv - - PYTHON_VERSION: 3.6 + - PYTHON_VERSION: 3.7 - NUMPY_VERSION: 'min' - SCIPY_VERSION: 'min' - MATPLOTLIB_VERSION: 'min' @@ -47,7 +47,7 @@ jobs: doc: docker: - - image: circleci/python:3.7.3-stretch + - image: circleci/python:3.7.7-buster environment: - OMP_NUM_THREADS: 2 - MKL_NUM_THREADS: 2 @@ -96,7 +96,7 @@ jobs: lint: docker: - - image: circleci/python:3.6 + - image: circleci/python:3.7 steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh @@ -130,7 +130,7 @@ jobs: deploy: docker: - - image: circleci/python:3.6 + - image: circleci/python:3.7 steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh diff --git a/.travis.yml b/.travis.yml index 1e6ed78d28ac2..09f05b57eecfa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,19 +40,11 @@ jobs: - CPU_COUNT=4 # Linux environments to build the scikit-learn wheels for the ARM64 - # architecture and Python 3.6 and newer. This is used both at release time + # architecture and Python 3.7 and newer. This is used both at release time # with the manual trigger in the commit message in the release branch and as # a scheduled task to build the weekly dev build on the main branch. The # weekly frequency is meant to avoid depleting the Travis CI credits too # fast. - - python: 3.6 - os: linux - arch: arm64 - if: type = cron or commit_message =~ /\[cd build\]/ - env: - - BUILD_WHEEL=true - - CIBW_BUILD=cp36-manylinux_aarch64 - - python: 3.7 os: linux arch: arm64 diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 412de99f5e57d..31baf41ff4cb1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -11,7 +11,7 @@ jobs: - job: git_commit displayName: Get Git Commit pool: - vmImage: ubuntu-18.04 + vmImage: ubuntu-20.04 steps: - bash: | set -ex @@ -38,7 +38,7 @@ jobs: ) displayName: Linting pool: - vmImage: ubuntu-18.04 + vmImage: ubuntu-20.04 steps: - task: UsePythonVersion@0 inputs: @@ -57,7 +57,7 @@ jobs: - template: build_tools/azure/posix.yml parameters: name: Linux_Nightly - vmImage: ubuntu-18.04 + vmImage: ubuntu-20.04 dependsOn: [git_commit, linting] condition: | and( @@ -83,7 +83,7 @@ jobs: - template: build_tools/azure/posix.yml parameters: name: Linux_Nightly_ICC - vmImage: ubuntu-18.04 + vmImage: ubuntu-20.04 dependsOn: [git_commit, linting] condition: | and( @@ -105,7 +105,7 @@ jobs: - template: build_tools/azure/posix.yml parameters: name: Linux_Runs - vmImage: ubuntu-18.04 + vmImage: ubuntu-20.04 dependsOn: [git_commit] condition: | and( @@ -119,10 +119,30 @@ jobs: BLAS: 'mkl' COVERAGE: 'true' +# Check compilation with Ubuntu bionic 18.04 LTS and scipy from conda-forge - template: build_tools/azure/posix.yml parameters: - name: Linux + name: Ubuntu_Bionic vmImage: ubuntu-18.04 + dependsOn: [git_commit, linting] + condition: | + and( + succeeded(), + not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')), + ne(variables['Build.Reason'], 'Schedule') + ) + matrix: + py37_conda: + DISTRIB: 'conda' + PYTHON_VERSION: '3.7' + BLAS: 'openblas' + COVERAGE: 'false' + BUILD_WITH_ICC: 'false' + +- template: build_tools/azure/posix.yml + parameters: + name: Linux + vmImage: ubuntu-20.04 dependsOn: [linting, git_commit] condition: | and( @@ -132,32 +152,23 @@ jobs: ) matrix: # Linux environment to test that scikit-learn can be built against - # versions of numpy, scipy with ATLAS that comes with Ubuntu Bionic 18.04 - # i.e. numpy 1.13.3 and scipy 0.19 - py36_ubuntu_atlas: + # versions of numpy, scipy with ATLAS that comes with Ubuntu Focal 20.04 + # i.e. numpy 1.17.4 and scipy 1.3.3 + ubuntu_atlas: DISTRIB: 'ubuntu' - PYTHON_VERSION: '3.6' JOBLIB_VERSION: 'min' PANDAS_VERSION: 'none' THREADPOOLCTL_VERSION: 'min' - PYTEST_VERSION: 'min' - PYTEST_XDIST_VERSION: 'none' COVERAGE: 'false' - # Linux + Python 3.6 build with OpenBLAS and without SITE_JOBLIB - py36_conda_openblas: + # Linux + Python 3.7 build with OpenBLAS and without SITE_JOBLIB + py37_conda_openblas: DISTRIB: 'conda' - PYTHON_VERSION: '3.6' + PYTHON_VERSION: '3.7' BLAS: 'openblas' NUMPY_VERSION: 'min' SCIPY_VERSION: 'min' MATPLOTLIB_VERSION: 'min' - # latest version of joblib available in conda for Python 3.6 - JOBLIB_VERSION: '0.13.2' THREADPOOLCTL_VERSION: '2.0.0' - # temporary pin pytest due to unknown failure with pytest 5.4 and - # python 3.6 - PYTEST_VERSION: 'min' - PYTEST_XDIST_VERSION: 'none' # Linux environment to test the latest available dependencies and MKL. # It runs tests requiring lightgbm, pandas and PyAMG. pylatest_pip_openblas_pandas: @@ -171,7 +182,7 @@ jobs: - template: build_tools/azure/posix-32.yml parameters: name: Linux32 - vmImage: ubuntu-18.04 + vmImage: ubuntu-20.04 dependsOn: [linting, git_commit] condition: | and( @@ -180,14 +191,11 @@ jobs: ne(variables['Build.Reason'], 'Schedule') ) matrix: - py36_ubuntu_atlas_32bit: - DISTRIB: 'ubuntu-32' - PYTHON_VERSION: '3.6' + debian_atlas_32bit: + DISTRIB: 'debian-32' JOBLIB_VERSION: 'min' # disable pytest xdist due to unknown bug with 32-bit container PYTEST_XDIST_VERSION: 'none' - # temporary pin pytest due to unknown failure with pytest 5.4 and - # python 3.6 PYTEST_VERSION: 'min' THREADPOOLCTL_VERSION: 'min' @@ -231,6 +239,6 @@ jobs: PYTHON_ARCH: '64' PYTEST_VERSION: '*' COVERAGE: 'true' - py36_pip_openblas_32bit: - PYTHON_VERSION: '3.6' + py37_pip_openblas_32bit: + PYTHON_VERSION: '3.7' PYTHON_ARCH: '32' diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh index d2711d6bd610e..048ffe300ee2a 100755 --- a/build_tools/azure/install.sh +++ b/build_tools/azure/install.sh @@ -70,9 +70,9 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then python -m pip install $(get_dep cython $CYTHON_VERSION) \ $(get_dep joblib $JOBLIB_VERSION) -elif [[ "$DISTRIB" == "ubuntu-32" ]]; then +elif [[ "$DISTRIB" == "debian-32" ]]; then apt-get update - apt-get install -y python3-dev python3-scipy python3-matplotlib libatlas3-base libatlas-base-dev python3-virtualenv python3-pandas ccache + apt-get install -y python3-dev python3-numpy python3-scipy python3-matplotlib libatlas3-base libatlas-base-dev python3-virtualenv python3-pandas ccache python3 -m virtualenv --system-site-packages --python=python3 $VIRTUALENV source $VIRTUALENV/bin/activate diff --git a/build_tools/azure/posix-32.yml b/build_tools/azure/posix-32.yml index 5e4689a2505e5..039236a70fbe5 100644 --- a/build_tools/azure/posix-32.yml +++ b/build_tools/azure/posix-32.yml @@ -45,7 +45,7 @@ jobs: -w /io --detach --name skcontainer - -e DISTRIB=ubuntu-32 + -e DISTRIB=debian-32 -e TEST_DIR=/temp_dir -e JUNITXML=$JUNITXML -e VIRTUALENV=testvenv @@ -63,7 +63,7 @@ jobs: -e OMP_NUM_THREADS=$OMP_NUM_THREADS -e OPENBLAS_NUM_THREADS=$OPENBLAS_NUM_THREADS -e SKLEARN_SKIP_NETWORK_TESTS=$SKLEARN_SKIP_NETWORK_TESTS - i386/ubuntu:18.04 + i386/debian:10.9 sleep 1000000 displayName: 'Start container' - script: > diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh index 858d691b38216..6e05d7d858e52 100755 --- a/build_tools/azure/test_script.sh +++ b/build_tools/azure/test_script.sh @@ -4,7 +4,7 @@ set -e if [[ "$DISTRIB" =~ ^conda.* ]]; then source activate $VIRTUALENV -elif [[ "$DISTRIB" == "ubuntu" ]] || [[ "$DISTRIB" == "ubuntu-32" ]]; then +elif [[ "$DISTRIB" == "ubuntu" ]] || [[ "$DISTRIB" == "debian-32" ]]; then source $VIRTUALENV/bin/activate fi diff --git a/doc/conftest.py b/doc/conftest.py index 5468184bf5509..a2770e5d36a10 100644 --- a/doc/conftest.py +++ b/doc/conftest.py @@ -7,6 +7,7 @@ from sklearn.utils import IS_PYPY from sklearn.utils._testing import SkipTest from sklearn.utils._testing import check_skip_network +from sklearn.utils.fixes import parse_version from sklearn.datasets import get_data_home from sklearn.datasets._base import _pkl_filepath from sklearn.datasets._twenty_newsgroups import CACHE_NAME @@ -80,6 +81,10 @@ def setup_grid_search(): def setup_preprocessing(): try: import pandas # noqa + if parse_version(pandas.__version__) < parse_version('1.1.0'): + raise SkipTest( + "Skipping preprocessing.rst, pandas version < 1.1.0" + ) except ImportError: raise SkipTest("Skipping preprocessing.rst, pandas not installed") diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst index 0a1d8407e64ae..0b618289b84ec 100644 --- a/doc/modules/sgd.rst +++ b/doc/modules/sgd.rst @@ -130,7 +130,7 @@ Using ``loss="log"`` or ``loss="modified_huber"`` enables the :math:`P(y|x)` per sample :math:`x`:: >>> clf = SGDClassifier(loss="log", max_iter=5).fit(X, y) - >>> clf.predict_proba([[1., 1.]]) + >>> clf.predict_proba([[1., 1.]]) # doctest: +SKIP array([[0.00..., 0.99...]]) The concrete penalty can be set via the ``penalty`` parameter. diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst index 3d87830fa0b26..e326b614472de 100644 --- a/doc/tutorial/statistical_inference/supervised_learning.rst +++ b/doc/tutorial/statistical_inference/supervised_learning.rst @@ -173,7 +173,7 @@ Linear models: :math:`y = X\beta + \epsilon` >>> regr = linear_model.LinearRegression() >>> regr.fit(diabetes_X_train, diabetes_y_train) LinearRegression() - >>> print(regr.coef_) + >>> print(regr.coef_) # doctest: +SKIP [ 0.30349955 -237.63931533 510.53060544 327.73698041 -814.13170937 492.81458798 102.84845219 184.60648906 743.51961675 76.09517222] diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index f94e7001fdc97..87b0441bade5f 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -12,6 +12,12 @@ Version 1.0.0 .. include:: changelog_legend.inc +Minimal dependencies +-------------------- + +Version 1.0.0 of scikit-learn requires python 3.7+, numpy 1.14.5+ and +scipy 1.1.0+. Optional minimal dependency is matplotlib 2.2.2+. + Enforcing keyword-only arguments -------------------------------- diff --git a/pyproject.toml b/pyproject.toml index c55c68b3182b8..84468f65341da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,5 +11,5 @@ requires = [ # see: https://github.com/scipy/oldest-supported-numpy/blob/master/setup.cfg "oldest-supported-numpy", - "scipy>=0.19.1", + "scipy>=1.1.0", ] diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py index 56d44586cdc6d..aa01b7fdfa352 100644 --- a/sklearn/_min_dependencies.py +++ b/sklearn/_min_dependencies.py @@ -5,12 +5,11 @@ # numpy scipy and cython should by in sync with pyproject.toml if platform.python_implementation() == 'PyPy': - SCIPY_MIN_VERSION = '1.1.0' NUMPY_MIN_VERSION = '1.19.0' else: - SCIPY_MIN_VERSION = '0.19.1' - NUMPY_MIN_VERSION = '1.13.3' + NUMPY_MIN_VERSION = '1.14.5' +SCIPY_MIN_VERSION = '1.1.0' JOBLIB_MIN_VERSION = '0.11' THREADPOOLCTL_MIN_VERSION = '2.0.0' PYTEST_MIN_VERSION = '5.0.1' @@ -26,9 +25,9 @@ 'joblib': (JOBLIB_MIN_VERSION, 'install'), 'threadpoolctl': (THREADPOOLCTL_MIN_VERSION, 'install'), 'cython': (CYTHON_MIN_VERSION, 'build'), - 'matplotlib': ('2.1.1', 'benchmark, docs, examples, tests'), - 'scikit-image': ('0.13', 'docs, examples, tests'), - 'pandas': ('0.25.0', 'benchmark, docs, examples, tests'), + 'matplotlib': ('2.2.2', 'benchmark, docs, examples, tests'), + 'scikit-image': ('0.14', 'docs, examples, tests'), + 'pandas': ('0.23.4', 'benchmark, docs, examples, tests'), 'seaborn': ('0.9.0', 'docs, examples'), 'memory_profiler': ('0.57.0', 'benchmark, docs'), 'pytest': (PYTEST_MIN_VERSION, 'tests'), diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py index 74239567dee48..7aa36c59da00e 100644 --- a/sklearn/decomposition/_truncated_svd.py +++ b/sklearn/decomposition/_truncated_svd.py @@ -87,18 +87,21 @@ class TruncatedSVD(TransformerMixin, BaseEstimator): Examples -------- >>> from sklearn.decomposition import TruncatedSVD - >>> from scipy.sparse import random as sparse_random - >>> X = sparse_random(100, 100, density=0.01, format='csr', - ... random_state=42) + >>> from scipy.sparse import csr_matrix + >>> import numpy as np + >>> np.random.seed(0) + >>> X_dense = np.random.rand(100, 100) + >>> X_dense[:, 2 * np.arange(50)] = 0 + >>> X = csr_matrix(X_dense) >>> svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42) >>> svd.fit(X) TruncatedSVD(n_components=5, n_iter=7, random_state=42) >>> print(svd.explained_variance_ratio_) - [0.0646... 0.0633... 0.0639... 0.0535... 0.0406...] + [0.0157... 0.0512... 0.0499... 0.0479... 0.0453...] >>> print(svd.explained_variance_ratio_.sum()) - 0.286... + 0.2102... >>> print(svd.singular_values_) - [1.553... 1.512... 1.510... 1.370... 1.199...] + [35.2410... 4.5981... 4.5420... 4.4486... 4.3288...] See Also -------- diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_loss.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_loss.py index 345e72c642668..9f4294a101700 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_loss.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_loss.py @@ -98,7 +98,7 @@ def fprime2(x: np.ndarray) -> np.ndarray: optimum = optimum.ravel() assert_allclose(loss.inverse_link_function(optimum), y_true) assert_allclose(func(optimum), 0, atol=1e-14) - assert_allclose(get_gradients(y_true, optimum), 0, atol=1e-7) + assert_allclose(get_gradients(y_true, optimum), 0, atol=1e-6) @pytest.mark.parametrize('loss, n_classes, prediction_dim', [ diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 66f7d9ae77687..c244d6f6caffc 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -24,7 +24,7 @@ from sklearn.utils import check_X_y from sklearn.utils import deprecated from sklearn.utils._mocking import MockDataFrame -from sklearn.utils.fixes import np_version, parse_version +from sklearn.utils.fixes import parse_version from sklearn.utils.estimator_checks import _NotAnArray from sklearn.random_projection import _sparse_random_matrix from sklearn.linear_model import ARDRegression @@ -49,7 +49,6 @@ _num_features, FLOAT_DTYPES) from sklearn.utils.validation import _check_fit_params -from sklearn.utils.fixes import parse_version import sklearn @@ -345,7 +344,7 @@ def test_check_array(): assert isinstance(result, np.ndarray) -# TODO: Check for error in 1.1 when implicit conversation is removed +# TODO: Check for error in 1.1 when implicit conversion is removed @pytest.mark.parametrize("X", [ [['1', '2'], ['3', '4']], np.array([['1', '2'], ['3', '4']], dtype='U'), @@ -368,14 +367,10 @@ def test_check_array_numeric_warns(X): [['11', '12'], ['13', 'xx']], np.array([['11', '12'], ['13', 'xx']], dtype='U'), np.array([['11', '12'], ['13', 'xx']], dtype='S'), - [[b'a', b'b'], [b'c', b'd']], - np.array([[b'a', b'b'], [b'c', b'd']], dtype='V1') + [[b'a', b'b'], [b'c', b'd']] ]) def test_check_array_dtype_numeric_errors(X): """Error when string-ike array can not be converted""" - if (np_version < parse_version("1.14") - and hasattr(X, "dtype") and X.dtype.kind == "V"): - pytest.skip("old numpy would convert V dtype into float silently") expected_warn_msg = "Unable to convert array of bytes/strings" with pytest.raises(ValueError, match=expected_warn_msg): check_array(X, dtype="numeric")