8000 merge master · jeremiedbb/scikit-learn@65060e0 · GitHub
[go: up one dir, main page]

Skip to content

Commit 65060e0

Browse files
committed
merge master
2 parents 4ad3d74 + bcd399f commit 65060e0

File tree

149 files changed

+668
-447
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

149 files changed

+668
-447
lines changed

.gitignore

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ _configtest.o.d
7575
# files generated from a template
7676
sklearn/utils/_seq_dataset.pyx
7777
sklearn/utils/_seq_dataset.pxd
78-
sklearn/linear_model/sag_fast.pyx
78+
sklearn/linear_model/_sag_fast.pyx
7979

8080
# deprecated paths
8181
# TODO: Remove in 0.24
@@ -129,3 +129,62 @@ sklearn/svm/bounds.py
129129
sklearn/svm/libsvm.py
130130
sklearn/svm/libsvm_sparse.py
131131
sklearn/svm/liblinear.py
132+
133+
sklearn/linear_model/base.py
134+
sklearn/linear_model/bayes.py
135+
sklearn/linear_model/cd_fast.py
136+
sklearn/linear_model/coordinate_descent.py
137+
sklearn/linear_model/huber.py
138+
sklearn/linear_model/least_angle.py
139+
sklearn/linear_model/logistic.py
140+
sklearn/linear_model/omp.py
141+
sklearn/linear_model/passive_aggressive.py
142+
sklearn/linear_model/perceptron.py
143+
sklearn/linear_model/ransac.py
144+
sklearn/linear_model/ridge.py
145+
sklearn/linear_model/sag.py
146+
sklearn/linear_model/sag_fast.py
147+
sklearn/linear_model/sgd_fast.py
148+
sklearn/linear_model/stochastic_gradient.py
149+
sklearn/linear_model/theil_sen.py
150+
151+
sklearn/metrics/cluster/bicluster.py
152+
sklearn/metrics/cluster/supervised.py
153+
sklearn/metrics/cluster/unsupervised.py
154+
sklearn/metrics/cluster/expected_mutual_info_fast.py
155+
156+
sklearn/metrics/base.py
157+
sklearn/metrics/classification.py
158+
sklearn/metrics/regression.py
159+
sklearn/metrics/ranking.py
160+
sklearn/metrics/pairwise_fast.py
161+
sklearn/metrics/scorer.py
162+
163+
sklearn/inspection/partial_dependence.py
164+
sklearn/inspection/permutation_importance.py
165+
166+
sklearn/neighbors/ball_tree.py
167+
sklearn/neighbors/base.py
168+
sklearn/neighbors/classification.py
169+
sklearn/neighbors/dist_metrics.py
170+
sklearn/neighbors/graph.py
171+
sklearn/neighbors/kd_tree.py
172+
sklearn/neighbors/kde.py
173+
sklearn/neighbors/lof.py
174+
sklearn/neighbors/nca.py
175+
sklearn/neighbors/nearest_centroid.py
176+
sklearn/neighbors/quad_tree.py
177+
sklearn/neighbors/regression.py
178+
sklearn/neighbors/typedefs.py
179+
sklearn/neighbors/unsupervised.py
180+
181+
sklearn/manifold/isomap.py
182+
sklearn/manifold/locally_linear.py
183+
sklearn/manifold/mds.py
184+
sklearn/manifold/spectral_embedding_.py
185+
sklearn/manifold/t_sne.py
186+
187+
sklearn/semi_supervised/label_propagation.py
188+
189+
sklearn/preprocessing/data.py
190+
sklearn/preprocessing/label.py

benchmarks/bench_rcv1_logreg_convergence.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from sklearn.linear_model import (LogisticRegression, SGDClassifier)
1313
from sklearn.datasets import fetch_rcv1
14-
from sklearn.linear_model.sag import get_auto_step_size
14+
from sklearn.linear_model._sag import get_auto_step_size
1515

1616
try:
1717
import lightning.classification as lightning_clf

benchmarks/bench_sparsify.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545

4646
from scipy.sparse.csr import csr_matrix
4747
import numpy as np
48-
from sklearn.linear_model.stochastic_gradient import SGDRegressor
48+
from sklearn.linear_model import SGDRegressor
4949
from sklearn.metrics import r2_score
5050

5151
np.random.seed(42)

build_tools/azure/install.cmd

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,15 @@ IF "%PYTHON_ARCH%"=="64" (
1111
call deactivate
1212
@rem Clean up any left-over from a previous build
1313
conda remove --all -q -y -n %VIRTUALENV%
14-
conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython matplotlib pytest=%PYTEST_VERSION% wheel pillow joblib
14+
conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython matplotlib wheel pillow joblib
1515

1616
call activate %VIRTUALENV%
17+
18+
IF "%PYTEST_VERSION%"=="*" (
19+
pip install pytest
20+
) else (
21+
pip install pytest==%PYTEST_VERSION%
22+
)
1723
pip install pytest-xdist
1824
) else (
1925
pip install numpy scipy cython pytest wheel pillow joblib

build_tools/azure/install.sh

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ version_ge() {
2020

2121
if [[ "$DISTRIB" == "conda" ]]; then
2222

23-
TO_INSTALL="python=$PYTHON_VERSION pip pytest=$PYTEST_VERSION \
24-
pytest-cov numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
23+
TO_INSTALL="python=$PYTHON_VERSION pip \
24+
numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
2525
cython=$CYTHON_VERSION joblib=$JOBLIB_VERSION"
2626

2727
if [[ "$INSTALL_MKL" == "true" ]]; then
@@ -60,6 +60,12 @@ if [[ "$DISTRIB" == "conda" ]]; then
6060
conda config --set restore_free_channel true
6161
fi
6262

63+
if [[ "$PYTEST_VERSION" == "*" ]]; then
64+
pip install pytest
65+
else
66+
pip install pytest=="$PYTEST_VERSION"
67+
fi
68+
6369
make_conda $TO_INSTALL
6470
if [[ "$PYTHON_VERSION" == "*" ]]; then
6571
pip install pytest-xdist
@@ -88,7 +94,7 @@ elif [[ "$DISTRIB" == "conda-pip-latest" ]]; then
8894
fi
8995

9096
if [[ "$COVERAGE" == "true" ]]; then
91-
python -m pip install coverage codecov
97+
python -m pip install coverage codecov pytest-cov
9298
fi
9399

94100
if [[ "$TEST_DOCSTRINGS" == "true" ]]; then

build_tools/azure/posix-32.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
TEST_DIR: '$(Agent.WorkFolder)/tmp_folder'
1212
JUNITXML: 'test-data.xml'
1313
OMP_NUM_THREADS: '4'
14-
PYTEST_VERSION: '3.8.1'
14+
PYTEST_VERSION: '5.2.1'
1515
OPENBLAS_NUM_THREADS: '4'
1616
SKLEARN_SKIP_NETWORK_TESTS: '1'
1717
strategy:

build_tools/azure/posix.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
TEST_DIR: '$(Agent.WorkFolder)/tmp_folder'
1212
VIRTUALENV: 'testvenv'
1313
JUNITXML: 'test-data.xml'
14-
PYTEST_VERSION: '3.8.1'
14+
PYTEST_VERSION: '5.2.1'
1515
OMP_NUM_THREADS: '4'
1616
OPENBLAS_NUM_THREADS: '4'
1717
SKLEARN_SKIP_NETWORK_TESTS: '1'

build_tools/azure/windows.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
VIRTUALENV: 'testvenv'
1313
JUNITXML: 'test-data.xml'
1414
SKLEARN_SKIP_NETWORK_TESTS: '1'
15-
PYTEST_VERSION: '3.8.1'
15+
PYTEST_VERSION: '5.2.1'
1616
TMP_FOLDER: '$(Agent.WorkFolder)\tmp_folder'
1717
strategy:
1818
matrix:

doc/developers/develop.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ this can be achieved with::
453453
return self.classes_[np.argmax(D, axis=1)]
454454

455455
In linear models, coefficients are stored in an array called ``coef_``, and the
456-
independent term is stored in ``intercept_``. ``sklearn.linear_model.base``
456+
independent term is stored in ``intercept_``. ``sklearn.linear_model._base``
457457
contains a few base classes and mixins that implement common linear model
458458
patterns.
459459

doc/modules/classes.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -882,7 +882,7 @@ Miscellaneous
882882
manifold.locally_linear_embedding
883883
manifold.smacof
884884
manifold.spectral_embedding
885-
manifold.t_sne.trustworthiness
885+
manifold.trustworthiness
886886

887887

888888
.. _metrics_ref:

doc/modules/computing.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ decision function that is applied at prediction time is the same (a dot product)
297297
, so latency should be equivalent.
298298

299299
Here is an example using
300-
:class:`sklearn.linear_model.stochastic_gradient.SGDClassifier` with the
300+
:class:`sklearn.linear_model.SGDClassifier` with the
301301
``elasticnet`` penalty. The regularization strength is globally controlled by
302302
the ``alpha`` parameter. With a sufficiently high ``alpha``,
303303
one can then increase the ``l1_ratio`` parameter of ``elasticnet`` to

doc/modules/cross_validation.rst

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ predefined scorer names::
241241

242242
Or as a dict mapping scorer name to a predefined or custom scoring function::
243243

244-
>>> from sklearn.metrics.scorer import make_scorer
244+
>>> from sklearn.metrics import make_scorer
245245
>>> scoring = {'prec_macro': 'precision_macro',
246246
... 'rec_macro': make_scorer(recall_score, average='macro')}
247247
>>> scores = cross_validate(clf, X, y, scoring=scoring,
@@ -534,19 +534,30 @@ Stratified k-fold
534534
folds: each set contains approximately the same percentage of samples of each
535535
target class as the complete set.
536536

537-
Example of stratified 3-fold cross-validation on a dataset with 10 samples from
538-
two slightly unbalanced classes::
537+
Here is an example of stratified 3-fold cross-validation on a dataset with 50 samples from
538+
two unbalanced classes. We show the number of samples in each class and compare with
539+
:class:`KFold`.
539540

540-
>>> from sklearn.model_selection import StratifiedKFold
541-
542-
>>> X = np.ones(10)
543-
>>> y = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
544-
>>> skf = StratifiedKFold(n_splits=3)
545-
>>> for train, test in skf.split(X, y):
546-
... print("%s %s" % (train, test))
547-
[2 3 6 7 8 9] [0 1 4 5]
548-
[0 1 3 4 5 8 9] [2 6 7]
549-
[0 1 2 4 5 6 7] [3 8 9]
541+
>>> from sklearn.model_selection import StratifiedKFold, KFold
542+
>>> import numpy as np
543+
>>> X, y = np.ones((50, 1)), np.hstack(([0] * 45, [1] * 5))
544+
>>> skf = StratifiedKFold(n_splits=3)
545+
>>> for train, test in skf.split(X, y):
546+
... print('train - {} | test - {}'.format(
547+
... np.bincount(y[train]), np.bincount(y[test])))
548+
train - [30 3] | test - [15 2]
549+
train - [30 3] | test - [15 2]
550+
train - [30 4] | test - [15 1]
551+
>>> kf = KFold(n_splits=3)
552+
>>> for train, test in kf.split(X, y):
553+
... print('train - {} | test - {}'.format(
554+
... np.bincount(y[train]), np.bincount(y[test])))
555+
train - [28 5] | test - [17]
556+
train - [28 5] | test - [17]
557+
train - [34] | test - [11 5]
558+
559+
We can see that :class:`StratifiedKFold` preserves the class ratios
560+
(approximately 1 / 10) in both train and test dataset.
550561

551562
Here is a visualization of the cross-validation behavior.
552563

doc/modules/decomposition.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ that learns :math:`n` components in its ``fit`` method, and can be used on new
2424
data to project it on these components.
2525

2626
PCA centers but does not scale the input data for each feature before
27-
applying the SVD. The optional parameter parameter ``whiten=True`` makes it
27+
applying the SVD. The optional parameter ``whiten=True`` makes it
2828
possible to project the data onto the singular space while scaling each
2929
component to unit variance. This is often useful if the models down-stream make
3030
strong assumptions on the isotropy of the signal: this is for example the case

doc/modules/density.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ It's clear how the kernel shape affects the smoothness of the resulting
7878
distribution. The scikit-learn kernel density estimator can be used as
7979
follows:
8080

81-
>>> from sklearn.neighbors.kde import KernelDensity
81+
>>> from sklearn.neighbors import KernelDensity
8282
>>> import numpy as np
8383
>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
8484
>>> kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(X)

doc/modules/neighbors.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ and Quadratic Discriminant Analysis (:class:`sklearn.discriminant_analysis.Quadr
469469
for more complex methods that do not make this assumption. Usage of the default
470470
:class:`NearestCentroid` is simple:
471471

472-
>>> from sklearn.neighbors.nearest_centroid import NearestCentroid
472+
>>> from sklearn.neighbors import NearestCentroid
473473
>>> import numpy as np
474474
>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
475475
>>> y = np.array([1, 1, 1, 2, 2, 2])

doc/templates/index.html

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -182,11 +182,6 @@ <h4 class="sk-landing-call-header">Community</h4>
182182
<li><strong>Gitter:</strong> <a href="https://gitter.im/scikit-learn/scikit-learn">gitter.im/scikit-learn</a></li>
183183
</ul>
184184

185-
<form target="_top" id="paypal-form" method="post" action="https://www.paypal.com/cgi-bin/webscr">
186-
<input type="hidden" value="_s-xclick" name="cmd">
187-
<input type="hidden" value="74EYUMF3FTSW8" name="hosted_button_id">
188-
</form>
189-
<a class="btn btn-warning btn-big sk-donate-btn mb-1" onclick="document.getElementById('paypal-form').submit(); ">Help us, <strong>donate!</strong></a>
190185
<a class="btn btn-warning btn-big mb-1" href="about.html#citing-scikit-learn"><strong>Cite us!</strong></a>
191186
</div>
192187
<div class="col-md-4">

doc/themes/scikit-learn-modern/static/css/theme.css

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,13 @@ div.sk-navbar-collapse {
491491
text-align: center;
492492
}
493493

494+
dl.citation > dd > ol > li {
495+
display: inline;
496+
}
497+
498+
dl.citation > dd > ol {
499+
margin-bottom: 0;
500+
}
494501

495502
/* docs index */
496503

doc/themes/scikit-learn/static/nature.css_t

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -967,11 +967,6 @@ div.container.index-lower ul li em {
967967
font-weight: bold;
968968
}
969969

970-
#paypal-form {
971-
margin: 30px 0;
972-
padding: 0;
973-
}
974-
975970
div.container.index-lower a.cite-us {
976971
margin-left: 60px;
977972
padding-right: 20px;

doc/whats_new/v0.21.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ random sampling procedures.
228228
- :func:`svm.SVC.decision_function` and
229229
:func:`multiclass.OneVsOneClassifier.decision_function`. |Fix|
230230
- :class:`linear_model.SGDClassifier` and any derived classifiers. |Fix|
231-
- Any model using the :func:`linear_model.sag.sag_solver` function with a `0`
231+
- Any model using the :func:`linear_model._sag.sag_solver` function with a `0`
232232
seed, including :class:`linear_model.LogisticRegression`,
233233
:class:`linear_model.LogisticRegressionCV`, :class:`linear_model.Ridge`,
234234
and :class:`linear_model.RidgeCV` with 'sag' solver. |Fix|

doc/whats_new/v0.22.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,10 @@ Changelog
460460
:func:`metrics.pairwise.manhattan_distances` in the case of sparse matrices.
461461
:pr:`15049` by `Paolo Toccaceli <ptocca>`.
462462

463+
- |Enhancement| :func:`metrics.median_absolute_error` now supports
464+
``multioutput`` parameter.
465+
:pr:`14732` by :user:`Agamemnon Krasoulis <agamemnonc>`.
466+
463467
:mod:`sklearn.model_selection`
464468
..............................
465469

@@ -663,7 +667,7 @@ Changelog
663667
- |Fix| :func:`utils.check_array` will now correctly detect numeric dtypes in
664668
pandas dataframes, fixing a bug where ``float32`` was upcast to ``float64``
665669
unnecessarily. :pr:`15094` by `Andreas Müller`_.
666-
670+
667671
- |API| The following utils have been deprecated and are now private:
668672
- ``choose_check_classifiers_labels``
669673
- ``enforce_estimator_tags_y``
@@ -719,4 +723,3 @@ These changes mostly affect library developers.
719723
:pr:`13392` by :user:`Rok Mihevc <rok>`.
720724

721725
- |Fix| Added ``check_transformer_data_not_an_array`` to checks where missing
722-

examples/applications/plot_model_complexity_influence.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from sklearn.metrics import mean_squared_error
3232
from sklearn.svm import NuSVR
3333
from sklearn.ensemble import GradientBoostingRegressor
34-
from sklearn.linear_model.stochastic_gradient import SGDClassifier
34+
from sklearn.linear_model import SGDClassifier
3535
from sklearn.metrics import hamming_loss
3636

3737
# #############################################################################

examples/linear_model/plot_sgd_penalties.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
Contours of where the penalty is equal to 1
77
for the three penalties L1, L2 and elastic-net.
88
9-
All of the above are supported by
10-
:class:`sklearn.linear_model.stochastic_gradient`.
9+
All of the above are supported by :class:`~sklearn.linear_model.SGDClassifier`
10+
and :class:`~sklearn.linear_model.SGDRegressor`.
1111
1212
"""
1313
print(__doc__)

0 commit comments

Comments
 (0)
0