scikit-learn
diff --git a/‎.gitignore
Lines changed: 31 additions & 1 deletion b/‎.gitignore
Lines changed: 31 additions & 1 deletion
diff --git a/‎benchmarks/bench_rcv1_logreg_convergence.py
Lines changed: 1 addition & 1 deletion b/‎benchmarks/bench_rcv1_logreg_convergence.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/bench_sparsify.py
Lines changed: 1 addition & 1 deletion b/‎benchmarks/bench_sparsify.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/developers/develop.rst
Lines changed: 1 addition & 1 deletion b/‎doc/developers/develop.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/modules/classes.rst
Lines changed: 1 addition & 15 deletions b/‎doc/modules/classes.rst
Lines changed: 1 addition & 15 deletions
diff --git a/‎doc/modules/computing.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/computing.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/modules/cross_validation.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/cross_validation.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/templates/index.html
Lines changed: 0 additions & 5 deletions b/‎doc/templates/index.html
Lines changed: 0 additions & 5 deletions
diff --git a/‎doc/themes/scikit-learn/static/nature.css_t
Lines changed: 0 additions & 5 deletions b/‎doc/themes/scikit-learn/static/nature.css_t
Lines changed: 0 additions & 5 deletions
diff --git a/‎doc/whats_new/v0.21.rst
Lines changed: 1 addition & 1 deletion b/‎doc/whats_new/v0.21.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/applications/plot_model_complexity_influence.py
Lines changed: 1 addition & 1 deletion b/‎examples/applications/plot_model_complexity_influence.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/linear_model/plot_sgd_penalties.py
Lines changed: 2 additions & 2 deletions b/‎examples/linear_model/plot_sgd_penalties.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/preprocessing/plot_map_data_to_normal.py
Lines changed: 4 additions & 2 deletions b/‎examples/preprocessing/plot_map_data_to_normal.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎sklearn/_build_utils/deprecated_modules.py
Lines changed: 55 additions & 0 deletions b/‎sklearn/_build_utils/deprecated_modules.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎sklearn/base.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/base.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/covariance/_graph_lasso_.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/covariance/_graph_lasso_.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/discriminant_analysis.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/discriminant_analysis.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/ensemble/_base.py
Lines changed: 11 additions & 2 deletions b/‎sklearn/ensemble/_base.py
Lines changed: 11 additions & 2 deletions
diff --git a/‎sklearn/ensemble/_voting.py
Lines changed: 15 additions & 5 deletions b/‎sklearn/ensemble/_voting.py
Lines changed: 15 additions & 5 deletions
@@ -75,7 +75,7 @@ _configtest.o.d
 # files generated from a template
 sklearn/utils/_seq_dataset.pyx
 sklearn/utils/_seq_dataset.pxd
-sklearn/linear_model/sag_fast.pyx
+sklearn/linear_model/_sag_fast.pyx
 
 # deprecated paths
 # TODO: Remove in 0.24
@@ -130,6 +130,36 @@ sklearn/svm/libsvm.py
 sklearn/svm/libsvm_sparse.py
 sklearn/svm/liblinear.py
 
+sklearn/linear_model/base.py
+sklearn/linear_model/bayes.py
+sklearn/linear_model/cd_fast.py
+sklearn/linear_model/coordinate_descent.py
+sklearn/linear_model/huber.py
+sklearn/linear_model/least_angle.py
+sklearn/linear_model/logistic.py
+sklearn/linear_model/omp.py
+sklearn/linear_model/passive_aggressive.py
+sklearn/linear_model/perceptron.py
+sklearn/linear_model/ransac.py
+sklearn/linear_model/ridge.py
+sklearn/linear_model/sag.py
+sklearn/linear_model/sag_fast.py
+sklearn/linear_model/sgd_fast.py
+sklearn/linear_model/stochastic_gradient.py
+sklearn/linear_model/theil_sen.py
+
+sklearn/metrics/cluster/bicluster.py
+sklearn/metrics/cluster/supervised.py
+sklearn/metrics/cluster/unsupervised.py
+sklearn/metrics/cluster/expected_mutual_info_fast.py
+
+sklearn/metrics/base.py
+sklearn/metrics/classification.py
+sklearn/metrics/regression.py
+sklearn/metrics/ranking.py
+sklearn/metrics/pairwise_fast.py
+sklearn/metrics/scorer.py
+
 sklearn/inspection/permutation_importance.py
 
 
@@ -11,7 +11,7 @@
 
 from sklearn.linear_model import (LogisticRegression, SGDClassifier)
 from sklearn.datasets import fetch_rcv1
-from sklearn.linear_model.sag import get_auto_step_size
+from sklearn.linear_model._sag import get_auto_step_size
 
 try:
     import lightning.classification as lightning_clf
 
@@ -45,7 +45,7 @@
 
 from scipy.sparse.csr import csr_matrix
 import numpy as np
-from sklearn.linear_model.stochastic_gradient import SGDRegressor
+from sklearn.linear_model import SGDRegressor
 from sklearn.metrics import r2_score
 
 np.random.seed(42)
 
@@ -453,7 +453,7 @@ this can be achieved with::
         return self.classes_[np.argmax(D, axis=1)]
 
 In linear models, coefficients are stored in an array called ``coef_``, and the
-independent term is stored in ``intercept_``.  ``sklearn.linear_model.base``
+independent term is stored in ``intercept_``.  ``sklearn.linear_model._base``
 contains a few base classes and mixins that implement common linear model
 patterns.
 
 
@@ -882,7 +882,7 @@ Miscellaneous
     manifold.locally_linear_embedding
     manifold.smacof
     manifold.spectral_embedding
-    manifold.t_sne.trustworthiness
+    manifold.trustworthiness
 
 
 .. _metrics_ref:
@@ -1480,20 +1480,6 @@ Estimators
 
    svm.l1_min_c
 
-Low-level methods
------------------
-
-.. autosummary::
-   :toctree: generated
-   :template: function.rst
-
-   svm.libsvm.cross_validation
-   svm.libsvm.decision_function
-   svm.libsvm.fit
-   svm.libsvm.predict
-   svm.libsvm.predict_proba
-
-
 .. _tree_ref:
 
 :mod:`sklearn.tree`: Decision Trees
 
@@ -297,7 +297,7 @@ decision function that is applied at prediction time is the same (a dot product)
 , so latency should be equivalent.
 
 Here is an example using
-:class:`sklearn.linear_model.stochastic_gradient.SGDClassifier` with the
+:class:`sklearn.linear_model.SGDClassifier` with the
 ``elasticnet`` penalty. The regularization strength is globally controlled by
 the ``alpha`` parameter. With a sufficiently high ``alpha``,
 one can then increase the ``l1_ratio`` parameter of ``elasticnet`` to
 
@@ -241,7 +241,7 @@ predefined scorer names::
 
 Or as a dict mapping scorer name to a predefined or custom scoring function::
 
-    >>> from sklearn.metrics.scorer import make_scorer
+    >>> from sklearn.metrics import make_scorer
     >>> scoring = {'prec_macro': 'precision_macro',
     ...            'rec_macro': make_scorer(recall_score, average='macro')}
     >>> scores = cross_validate(clf, X, y, scoring=scoring,
 
@@ -182,11 +182,6 @@ <h4 class="sk-landing-call-header">Community</h4>
         <li><strong>Gitter:</strong> <a href="https://gitter.im/scikit-learn/scikit-learn">gitter.im/scikit-learn</a></li>
         </ul>
 
-        <form target="_top" id="paypal-form" method="post" action="https://www.paypal.com/cgi-bin/webscr">
-            <input type="hidden" value="_s-xclick" name="cmd">
-            <input type="hidden" value="74EYUMF3FTSW8" name="hosted_button_id">
-        </form>
-        <a class="btn btn-warning btn-big sk-donate-btn mb-1" onclick="document.getElementById('paypal-form').submit(); ">Help us, <strong>donate!</strong></a>
         <a class="btn btn-warning btn-big mb-1" href="about.html#citing-scikit-learn"><strong>Cite us!</strong></a>
       </div>
       <div class="col-md-4">
 
@@ -967,11 +967,6 @@ div.container.index-lower ul li em {
     font-weight: bold;
 }
 
-#paypal-form {
-    margin: 30px 0;
-    padding: 0;
-}
-
 div.container.index-lower a.cite-us {
     margin-left: 60px;
     padding-right: 20px;
 
@@ -228,7 +228,7 @@ random sampling procedures.
 - :func:`svm.SVC.decision_function` and
   :func:`multiclass.OneVsOneClassifier.decision_function`. |Fix|
 - :class:`linear_model.SGDClassifier` and any derived classifiers. |Fix|
-- Any model using the :func:`linear_model.sag.sag_solver` function with a `0`
+- Any model using the :func:`linear_model._sag.sag_solver` function with a `0`
   seed, including :class:`linear_model.LogisticRegression`,
   :class:`linear_model.LogisticRegressionCV`, :class:`linear_model.Ridge`,
   and :class:`linear_model.RidgeCV` with 'sag' solver. |Fix|
 
@@ -31,7 +31,7 @@
 from sklearn.metrics import mean_squared_error
 from sklearn.svm import NuSVR
 from sklearn.ensemble import GradientBoostingRegressor
-from sklearn.linear_model.stochastic_gradient import SGDClassifier
+from sklearn.linear_model import SGDClassifier
 from sklearn.metrics import hamming_loss
 
 # #############################################################################
 
@@ -6,8 +6,8 @@
 Contours of where the penalty is equal to 1
 for the three penalties L1, L2 and elastic-net.
 
-All of the above are supported by
-:class:`sklearn.linear_model.stochastic_gradient`.
+All of the above are supported by :class:`~sklearn.linear_model.SGDClassifier`
+and :class:`~sklearn.linear_model.SGDRegressor`.
 
 """
 print(__doc__)
 
@@ -3,8 +3,10 @@
 Map data to a normal distribution
 =================================
 
+.. currentmodule:: sklearn.preprocessing
+
 This example demonstrates the use of the Box-Cox and Yeo-Johnson transforms
-through :class:`preprocessing.PowerTransformer` to map data from various
+through :class:`~PowerTransformer` to map data from various
 distributions to a normal distribution.
 
 The power transform is useful as a transformation in modeling problems where
@@ -22,7 +24,7 @@
 support inputs with negative values.
 
 For comparison, we also add the output from
-:class:`preprocessing.QuantileTransformer`. It can force any arbitrary
+:class:`~QuantileTransformer`. It can force any arbitrary
 distribution into a gaussian, provided that there are enough training samples
 (thousands). Because it is a non-parametric method, it is harder to interpret
 than the parametric ones (Box-Cox and Yeo-Johnson).
 
@@ -86,6 +86,61 @@
      'set_verbosity_wrap'),
     ('_liblinear', 'sklearn.svm.liblinear', 'sklearn.svm', 'train_wrap'),
 
+    ('_base', 'sklearn.linear_model.base', 'sklearn.linear_model',
+     'LinearRegression'),
+    ('_cd_fast', 'sklearn.linear_model.cd_fast', 'sklearn.linear_model',
+     'sparse_enet_coordinate_descent'),
+    ('_bayes', 'sklearn.linear_model.bayes', 'sklearn.linear_model',
+     'BayesianRidge'),
+    ('_coordinate_descent', 'sklearn.linear_model.coordinate_descent',
+     'sklearn.linear_model', 'Lasso'),
+    ('_huber', 'sklearn.linear_model.huber', 'sklearn.linear_model',
+     'HuberRegressor'),
+    ('_least_angle', 'sklearn.linear_model.least_angle',
+     'sklearn.linear_model', 'LassoLarsCV'),
+    ('_logistic', 'sklearn.linear_model.logistic', 'sklearn.linear_model',
+     'LogisticRegression'),
+    ('_omp', 'sklearn.linear_model.omp', 'sklearn.linear_model',
+     'OrthogonalMatchingPursuit'),
+    ('_passive_aggressive', 'sklearn.linear_model.passive_aggressive',
+     'sklearn.linear_model', 'PassiveAggressiveClassifier'),
+    ('_perceptron', 'sklearn.linear_model.perceptron', 'sklearn.linear_model',
+     'Perceptron'),
+    ('_ransac', 'sklearn.linear_model.ransac', 'sklearn.linear_model',
+     'RANSACRegressor'),
+    ('_ridge', 'sklearn.linear_model.ridge', 'sklearn.linear_model',
+     'Ridge'),
+    ('_sag', 'sklearn.linear_model.sag', 'sklearn.linear_model',
+     'get_auto_step_size'),
+    ('_sag_fast', 'sklearn.linear_model.sag_fast', 'sklearn.linear_model',
+     'MultinomialLogLoss64'),
+    ('_sgd_fast', 'sklearn.linear_model.sgd_fast', 'sklearn.linear_model',
+     'Hinge'),
+    ('_stochastic_gradient', 'sklearn.linear_model.stochastic_gradient',
+     'sklearn.linear_model', 'SGDClassifier'),
+    ('_theil_sen', 'sklearn.linear_model.theil_sen', 'sklearn.linear_model',
+     'TheilSenRegressor'),
+
+    ('_bicluster', 'sklearn.metrics.cluster.bicluster',
+     'sklearn.metrics.cluster', 'consensus_score'),
+    ('_supervised', 'sklearn.metrics.cluster.supervised',
+     'sklearn.metrics.cluster', 'entropy'),
+    ('_unsupervised', 'sklearn.metrics.cluster.unsupervised',
+     'sklearn.metrics.cluster', 'silhouette_score'),
+    ('_expected_mutual_info_fast',
+     'sklearn.metrics.cluster.expected_mutual_info_fast',
+     'sklearn.metrics.cluster', 'expected_mutual_information'),
+
+    ('_base', 'sklearn.metrics.base', 'sklearn.metrics', 'combinations'),
+    ('_classification', 'sklearn.metrics.classification', 'sklearn.metrics',
+     'accuracy_score'),
+    ('_regression', 'sklearn.metrics.regression', 'sklearn.metrics',
+     'max_error'),
+    ('_ranking', 'sklearn.metrics.ranking', 'sklearn.metrics', 'roc_curve'),
+    ('_pairwise_fast', 'sklearn.metrics.pairwise_fast', 'sklearn.metrics',
+     'np'),
+    ('_scorer', 'sklearn.metrics.scorer', 'sklearn.metrics', 'get_scorer'),
+
     ('_partial_dependence', 'sklearn.inspection.partial_dependence',
      'sklearn.inspection', 'partial_dependence'),
     ('_permutation_importance', 'sklearn.inspection.permutation_importance',
 
@@ -404,7 +404,7 @@ def score(self, X, y, sample_weight=None):
         """
 
         from .metrics import r2_score
-        from .metrics.regression import _check_reg_targets
+        from .metrics._regression import _check_reg_targets
         y_pred = self.predict(X)
         # XXX: Remove the check in 0.23
         y_type, _, _, _ = _check_reg_targets(y, y_pred, None)
 
@@ -19,7 +19,7 @@
 
 from ..exceptions import ConvergenceWarning
 from ..utils.validation import check_random_state, check_array
-from ..linear_model import cd_fast
+from ..linear_model import _cd_fast as cd_fast
 from ..linear_model import lars_path_gram
 from ..model_selection import check_cv, cross_val_score
 
 
@@ -16,7 +16,7 @@
 from scipy.special import expit
 
 from .base import BaseEstimator, TransformerMixin, ClassifierMixin
-from .linear_model.base import LinearClassifierMixin
+from .linear_model._base import LinearClassifierMixin
 from .covariance import ledoit_wolf, empirical_covariance, shrunk_covariance
 from .utils.multiclass import unique_labels
 from .utils import check_array, check_X_y
 
@@ -7,6 +7,7 @@
 
 from abc import ABCMeta, abstractmethod
 import numbers
+import warnings
 
 import numpy as np
 
@@ -223,6 +224,15 @@ def _validate_estimators(self):
         # defined by MetaEstimatorMixin
         self._validate_names(names)
 
+        # FIXME: deprecate the usage of None to drop an estimator from the
+        # ensemble. Remove in 0.24
+        if any(est is None for est in estimators):
+            warnings.warn(
+                "Using 'None' to drop an estimator from the ensemble is "
+                "deprecated in 0.22 and support will be dropped in 0.24. "
+                "Use the string 'drop' instead.", DeprecationWarning
+            )
+
         has_estimator = any(est not in (None, 'drop') for est in estimators)
         if not has_estimator:
             raise ValueError(
@@ -236,8 +246,7 @@ def _validate_estimators(self):
         for est in estimators:
             if est not in (None, 'drop') and not is_estimator_type(est):
                 raise ValueError(
-                    "The estimator {} should be a {}."
-                    .format(
+                    "The estimator {} should be a {}.".format(
                         est.__class__.__name__, is_estimator_type.__name__[3:]
                     )
                 )
 
@@ -88,9 +88,13 @@ class VotingClassifier(ClassifierMixin, _BaseVoting):
     estimators : list of (string, estimator) tuples
         Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones
         of those original estimators that will be stored in the class attribute
-        ``self.estimators_``. An estimator can be set to ``None`` or ``'drop'``
+        ``self.estimators_``. An estimator can be set to ``'drop'``
         using ``set_params``.
 
+        .. deprecated:: 0.22
+           Using ``None`` to drop an estimator is deprecated in 0.22 and
+           support will be dropped in 0.24. Use the string ``'drop'`` instead.
+
     voting : str, {'hard', 'soft'} (default='hard')
         If 'hard', uses predicted class labels for majority rule voting.
         Else if 'soft', predicts the class label based on the argmax of
@@ -119,7 +123,7 @@ class VotingClassifier(ClassifierMixin, _BaseVoting):
     ----------
     estimators_ : list of classifiers
         The collection of fitted sub-estimators as defined in ``estimators``
-        that are not `None`.
+        that are not 'drop'.
 
     named_estimators_ : Bunch object, a dictionary with attribute access
         Attribute to access any fitted sub-estimators by name.
@@ -322,8 +326,12 @@ class VotingRegressor(RegressorMixin, _BaseVoting):
     estimators : list of (string, estimator) tuples
         Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones
         of those original estimators that will be stored in the class attribute
-        ``self.estimators_``. An estimator can be set to ``None`` or ``'drop'``
-        using ``set_params``.
+        ``self.estimators_``. An estimator can be set to ``'drop'`` using
+        ``set_params``.
+
+        .. deprecated:: 0.22
+           Using ``None`` to drop an estimator is deprecated in 0.22 and
+           support will be dropped in 0.24. Use the string ``'drop'`` instead.
 
     weights : array-like, shape (n_regressors,), optional (default=`None`)
         Sequence of weights (`float` or `int`) to weight the occurrences of
@@ -339,11 +347,13 @@ class VotingRegressor(RegressorMixin, _BaseVoting):
     ----------
     estimators_ : list of regressors
         The collection of fitted sub-estimators as defined in ``estimators``
-        that are not `None`.
+        that are not 'drop'.
 
     named_estimators_ : Bunch object, a dictionary with attribute access
         Attribute to access any fitted sub-estimators by name.
 
+        .. versionadded:: 0.20
+
     Examples
     --------
     >>> import numpy as np