scikit-learn-contrib · glemaitre · Oct 31, 2019 · Oct 29, 2019 · Oct 29, 2019 · Oct 29, 2019
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -4,7 +4,7 @@ formats:
   - none
 requirements_file: requirements.txt
 python:
-  version: 3.6
+  version: 3.7
   pip_install: true
   extra_requirements:
     - tests

diff --git a/.travis.yml b/.travis.yml
@@ -1,5 +1,5 @@
 # make it explicit that we favor the new container-based travis workers
-dist: xenial
+dist: bionic
 sudo: false
 
 language: python
@@ -32,16 +32,12 @@ matrix:
     # Ubuntu 14.04 environment
     - env: DISTRIB="ubuntu"
     # Latest release
-    - env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="0.21.2"
-           OPTIONAL_DEPS="true"
     - env: DISTRIB="conda" PYTHON_VERSION="3.7"
-           NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="0.21.2"
-           OPTIONAL_DEPS="false"
+           NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="master"
+           OPTIONAL_DEPS="keras"
     - env: DISTRIB="conda" PYTHON_VERSION="3.7"
            NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="master"
-           OPTIONAL_DEPS="false"
-  allow_failures:
+           OPTIONAL_DEPS="tensorflow"
     - env: DISTRIB="conda" PYTHON_VERSION="3.7"
            NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="master"
            OPTIONAL_DEPS="false"

diff --git a/README.rst b/README.rst
@@ -162,10 +162,9 @@ Below is a list of the methods currently implemented in this module.
     2. SMOTE + ENN [11]_
 
 * Ensemble classifier using samplers internally
-    1. EasyEnsemble [13]_
-    2. BalanceCascade [13]_
-    3. Balanced Random Forest [16]_
-    4. Balanced Bagging
+    1. Easy Ensemble classifier [13]_
+    2. Balanced Random Forest [16]_
+    3. Balanced Bagging
 
 The different algorithms are presented in the sphinx-gallery_.
 

diff --git a/appveyor.yml b/appveyor.yml
@@ -10,7 +10,12 @@ environment:
     - PYTHON: "C:\\Miniconda36-x64"
       PYTHON_VERSION: "3.6"
       PYTHON_ARCH: "64"
-      OPTIONAL_DEP: "pandas keras tensorflow"
+      OPTIONAL_DEP: "pandas keras tensorflow=1"
+
+    - PYTHON: "C:\\Miniconda36-x64"
+      PYTHON_VERSION: "3.6"
+      PYTHON_ARCH: "64"
+      OPTIONAL_DEP: "pandas tensorflow"
 
     - PYTHON: "C:\\Miniconda36-x64"
       PYTHON_VERSION: "3.7"
@@ -30,8 +35,8 @@ install:
   - conda update conda -y -q
   - conda create -n testenv --yes python=%PYTHON_VERSION% pip
   - activate testenv
-  - conda install scipy numpy -y -q
-  - conda install scikit-learn -y -q
+  - conda install scipy numpy joblib -y -q
+  - pip install --pre -f https://sklearn-nightly.scdn8.secure.raxcdn.com scikit-learn
   - conda install %OPTIONAL_DEP% -y -q
   - conda install pytest pytest-cov -y -q
   - pip install codecov

diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
@@ -88,13 +88,14 @@ conda update --yes --quiet conda
 
 # Configure the conda environment and put it in the path using the
 # provided versions
-conda create -n $CONDA_ENV_NAME --yes --quiet python=3.6
+conda create -n $CONDA_ENV_NAME --yes --quiet python=3.7
 source activate $CONDA_ENV_NAME
 
-conda install --yes pip numpy scipy pillow matplotlib sphinx \
-      sphinx_rtd_theme numpydoc pandas keras
-pip install --pre scikit-learn
+conda install --yes pip numpy scipy joblib pillow matplotlib sphinx \
+      memory_profiler sphinx_rtd_theme pandas keras tensorflow=1
+pip install --pre -f https://sklearn-nightly.scdn8.secure.raxcdn.com scikit-learn
 pip install -U git+https://github.com/sphinx-gallery/sphinx-gallery.git
+pip install -U git+https://github.com/numpy/numpydoc.git
 
 # Build and install imbalanced-learn in dev mode
 ls -l

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
@@ -28,27 +28,24 @@ if [[ "$DISTRIB" == "conda" ]]; then
     MINICONDA_PATH=/home/travis/miniconda
     chmod +x miniconda.sh && ./miniconda.sh -b -p $MINICONDA_PATH
     export PATH=$MINICONDA_PATH/bin:$PATH
-    conda config --set always_yes yes --set changeps1 no
-    conda install conda=4.6
-    conda update -q conda
-    conda info -a
 
     # Configure the conda environment and put it in the path using the
     # provided versions
     conda create -n testenv --yes python=$PYTHON_VERSION pip
     source activate testenv
     conda install --yes numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
 
-    if [[ "$OPTIONAL_DEPS" == "true" ]]; then
-        conda install --yes pandas keras tensorflow
+    if [[ "$OPTIONAL_DEPS" == "keras" ]]; then
+        conda install --yes pandas keras tensorflow=1
         KERAS_BACKEND=tensorflow
         python -c "import keras.backend"
         sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
+    elif [[ "$OPTIONAL_DEPS" == "tensorflow" ]]; then
+        conda install --yes pandas tensorflow
     fi
 
     if [[ "$SKLEARN_VERSION" == "master" ]]; then
-        conda install --yes cython
-        pip install -U git+https://github.com/scikit-learn/scikit-learn.git
+        pip install --pre -f https://sklearn-nightly.scdn8.secure.raxcdn.com scikit-learn
     else
         conda install --yes scikit-learn=$SKLEARN_VERSION
     fi
@@ -65,8 +62,8 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then
     virtualenv --system-site-packages --python=python3 testvenv
     source testvenv/bin/activate
 
-    pip3 install scikit-learn
-    pip3 install pandas keras tensorflow
+    pip install --pre -f https://sklearn-nightly.scdn8.secure.raxcdn.com scikit-learn
+    pip3 install pandas
     pip3 install pytest pytest-cov codecov sphinx numpydoc
 
 fi

diff --git a/doc/api.rst b/doc/api.rst
@@ -111,10 +111,8 @@ Prototype selection
    :toctree: generated/
    :template: class.rst
 
-   ensemble.BalanceCascade
    ensemble.BalancedBaggingClassifier
    ensemble.BalancedRandomForestClassifier
-   ensemble.EasyEnsemble
    ensemble.EasyEnsembleClassifier
    ensemble.RUSBoostClassifier
 
@@ -251,5 +249,4 @@ Imbalance-learn provides some fast-prototyping tools.
 
    utils.estimator_checks.check_estimator
    utils.check_neighbors_object
-   utils.check_ratio
    utils.check_sampling_strategy
diff --git a/doc/conf.py b/doc/conf.py
@@ -265,6 +265,7 @@
 sphinx_gallery_conf = {
     'doc_module': 'imblearn',
     'backreferences_dir': os.path.join('generated'),
+    'show_memory': True,
     'reference_url': {
         'imblearn': None}
 }

diff --git a/doc/whats_new/v0.6.rst b/doc/whats_new/v0.6.rst
@@ -6,3 +6,46 @@ Version 0.6.0 (under-development)
 Changelog
 ---------
 
+Changed models
+..............
+
+The following models might give some different sampling due to changes in
+scikit-learn:
+
+- :class:`imblearn.under_sampling.ClusterCentroid`
+- :class:`imblearn.under_sampling.InstanceHardnessThreshold`
+
+Maintenance
+...........
+
+- Update imports from scikit-learn after that some modules have been privatize.
+  The following import have been changed:
+  :class:`sklearn.ensemble._base._set_random_states`,
+  :class:`sklearn.ensemble._forest._parallel_build_trees`,
+  :class:`sklearn.metrics._classification._check_targets`,
+  :class:`sklearn.metrics._classification._prf_divide`,
+  :class:`sklearn.utils.Bunch`,
+  :class:`sklearn.utils._safe_indexing`,
+  :class:`sklearn.utils._testing.assert_allclose`,
+  :class:`sklearn.utils._testing.assert_array_equal`,
+  :class:`sklearn.utils._testing.SkipTest`.
+  :pr:`617` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+Deprecation
+...........
+
+- The following classes have been removed after 2 deprecation cycles:
+  `ensemble.BalanceCascade` and `ensemble.EasyEnsemble`.
+  :pr:`617` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- The following functions have been removed after 2 deprecation cycles:
+  `utils.check_ratio`.
+  :pr:`617` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- The parameter `ratio` and `return_indices` has been removed from all
+  samplers.
+  :pr:`617` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- The parameters `m_neighbors`, `out_step`, `kind`, `svm_estimator`
+  have been removed from the :class:`imblearn.over_sampling.SMOTE`.
+  :pr:`617` by :user:`Guillaume Lemaitre <glemaitre>`.
diff --git a/examples/ensemble/plot_comparison_ensemble_classifier.py b/examples/ensemble/plot_comparison_ensemble_classifier.py
@@ -111,9 +111,8 @@ def plot_confusion_matrix(cm, classes, ax,
 # will use a bagging classifier and its counter part which internally uses a
 # random under-sampling to balanced each boostrap sample.
 
-bagging = BaggingClassifier(n_estimators=50, random_state=0, n_jobs=-1)
-balanced_bagging = BalancedBaggingClassifier(n_estimators=50, random_state=0,
-                                             n_jobs=-1)
+bagging = BaggingClassifier(n_estimators=50, random_state=0)
+balanced_bagging = BalancedBaggingClassifier(n_estimators=50, random_state=0)
 
 bagging.fit(X_train, y_train)
 balanced_bagging.fit(X_train, y_train)
@@ -149,9 +148,8 @@ def plot_confusion_matrix(cm, classes, ax,
 # outperforming bagging. Here, we used a vanilla random forest and its balanced
 # counterpart in which each bootstrap sample is balanced.
 
-rf = RandomForestClassifier(n_estimators=50, random_state=0, n_jobs=-1)
-brf = BalancedRandomForestClassifier(n_estimators=50, random_state=0,
-                                     n_jobs=-1)
+rf = RandomForestClassifier(n_estimators=50, random_state=0)
+brf = BalancedRandomForestClassifier(n_estimators=50, random_state=0)
 
 rf.fit(X_train, y_train)
 brf.fit(X_train, y_train)
@@ -189,8 +187,7 @@ def plot_confusion_matrix(cm, classes, ax,
 
 base_estimator = AdaBoostClassifier(n_estimators=10)
 eec = EasyEnsembleClassifier(n_estimators=10,
-                             base_estimator=base_estimator,
-                             n_jobs=-1)
+                             base_estimator=base_estimator)
 eec.fit(X_train, y_train)
 y_pred_eec = eec.predict(X_test)
 print('Easy ensemble classifier performance:')

diff --git a/imblearn/__init__.py b/imblearn/__init__.py
@@ -46,6 +46,17 @@
 from ._version import __version__
 from .utils._show_versions import show_versions
 
-__all__ = ['combine', 'ensemble', 'exceptions', 'keras', 'metrics',
-           'over_sampling', 'tensorflow', 'under_sampling',
-           'utils', 'pipeline', 'FunctionSampler', '__version__']
+__all__ = [
+    "combine",
+    "ensemble",
+    "exceptions",
+    "keras",
+    "metrics",
+    "over_sampling",
+    "tensorflow",
+    "under_sampling",
+    "utils",
+    "pipeline",
+    "FunctionSampler",
+    "__version__",
+]
diff --git a/imblearn/_version.py b/imblearn/_version.py
@@ -22,4 +22,4 @@
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
 
-__version__ = '0.6.0.dev0'
+__version__ = "0.6.0.dev0"
diff --git a/imblearn/base.py b/imblearn/base.py
@@ -4,7 +4,6 @@
 #          Christos Aridas
 # License: MIT
 
-import warnings
 from abc import ABCMeta, abstractmethod
 
 import numpy as np
@@ -15,7 +14,6 @@
 from sklearn.utils.multiclass import check_classification_targets
 
 from .utils import check_sampling_strategy, check_target_type
-from .utils.deprecation import deprecate_parameter
 
 
 class SamplerMixin(BaseEstimator, metaclass=ABCMeta):
@@ -25,7 +23,7 @@ class SamplerMixin(BaseEstimator, metaclass=ABCMeta):
     instead.
     """
 
-    _estimator_type = 'sampler'
+    _estimator_type = "sampler"
 
     def fit(self, X, y):
         """Check inputs and statistics of the sampler.
@@ -46,10 +44,10 @@ def fit(self, X, y):
             Return the instance itself.
 
         """
-        self._deprecate_ratio()
         X, y, _ = self._check_X_y(X, y)
         self.sampling_strategy_ = check_sampling_strategy(
-            self.sampling_strategy, y, self._sampling_type)
+            self.sampling_strategy, y, self._sampling_type
+        )
         return self
 
     def fit_resample(self, X, y):
@@ -73,13 +71,12 @@ def fit_resample(self, X, y):
             The corresponding label of `X_resampled`.
 
         """
-        self._deprecate_ratio()
-
         check_classification_targets(y)
         X, y, binarize_y = self._check_X_y(X, y)
 
         self.sampling_strategy_ = check_sampling_strategy(
-            self.sampling_strategy, y, self._sampling_type)
+            self.sampling_strategy, y, self._sampling_type
+        )
 
         output = self._fit_resample(X, y)
 
@@ -126,31 +123,15 @@ class BaseSampler(SamplerMixin):
     instead.
     """
 
-    def __init__(self, sampling_strategy='auto', ratio=None):
+    def __init__(self, sampling_strategy="auto"):
         self.sampling_strategy = sampling_strategy
-        # FIXME: remove in 0.6
-        self.ratio = ratio
 
     @staticmethod
     def _check_X_y(X, y):
         y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
-        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
+        X, y = check_X_y(X, y, accept_sparse=["csr", "csc"])
         return X, y, binarize_y
 
-    @property
-    def ratio_(self):
-        # FIXME: remove in 0.6
-        warnings.warn("'ratio' and 'ratio_' are deprecated. Use "
-                      "'sampling_strategy' and 'sampling_strategy_' instead.",
-                      DeprecationWarning)
-        return self.sampling_strategy_
-
-    def _deprecate_ratio(self):
-        # both ratio and sampling_strategy should not be set
-        if self.ratio is not None:
-            deprecate_parameter(self, '0.4', 'ratio', 'sampling_strategy')
-            self.sampling_strategy = self.ratio
-
 
 def _identity(X, y):
     return X, y
@@ -219,7 +200,7 @@ class FunctionSampler(BaseSampler):
 
     """
 
-    _sampling_type = 'bypass'
+    _sampling_type = "bypass"
 
     def __init__(self, func=None, accept_sparse=True, kw_args=None):
         super().__init__()
@@ -228,8 +209,9 @@ def __init__(self, func=None, accept_sparse=True, kw_args=None):
         self.kw_args = kw_args
 
     def _fit_resample(self, X, y):
-        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc']
-                         if self.accept_sparse else False)
+        X, y = check_X_y(
+            X, y, accept_sparse=["csr", "csc"] if self.accept_sparse else False
+        )
         func = _identity if self.func is None else self.func
         output = func(X, y, **(self.kw_args if self.kw_args else {}))
         return output
diff --git a/imblearn/combine/__init__.py b/imblearn/combine/__init__.py
@@ -5,4 +5,4 @@
 from ._smote_enn import SMOTEENN
 from ._smote_tomek import SMOTETomek
 
-__all__ = ['SMOTEENN', 'SMOTETomek']
+__all__ = ["SMOTEENN", "SMOTETomek"]