10000 Merge remote-tracking branch 'upstream/master' into cat_hgbt_rb · scikit-learn/scikit-learn@d0557a5 · GitHub
[go: up one dir, main page]

Skip to content

Commit d0557a5

Browse files
committed
Merge remote-tracking branch 'upstream/master' into cat_hgbt_rb
2 parents ae9be56 + a0c76ce commit d0557a5

28 files changed

+1037
-102
lines changed

doc/conf.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import warnings
1818
import re
1919
from packaging.version import parse
20+
from pathlib import Path
2021

2122
# If extensions (or modules to document with autodoc) are in another
2223
# directory, add these directories to sys.path here. If the directory
@@ -208,6 +209,23 @@
208209
# If true, the reST sources are included in the HTML build as _sources/name.
209210
html_copy_source = True
210211

212+
# Adds variables into templates
213+
html_context = {}
214+
# finds latest release highlights and places it into HTML context for
215+
# index.html
216+
release_highlights_dir = Path("..") / "examples" / "release_highlights"
217+
# Finds the highlight with the latest version number
218+
latest_highlights = sorted(release_highlights_dir.glob(
219+
"plot_release_highlights_*.py"))[-1]
220+
latest_highlights = latest_highlights.with_suffix('').name
221+
html_context["release_highlights"] = \
222+
f"auto_examples/release_highlights/{latest_highlights}"
223+
224+
# get version from higlight name assuming highlights have the form
225+
# plot_release_highlights_0_22_0
226+
highlight_version = ".".join(latest_highlights.split("_")[-3:-1])
227+
html_context["release_highlights_version"] = highlight_version
228+
211229
# -- Options for LaTeX output ------------------------------------------------
212230
latex_elements = {
213231
# The paper size ('letterpaper' or 'a4paper').

doc/modules/classes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,6 +1569,7 @@ Plotting
15691569
utils.deprecated
15701570
utils.estimator_checks.check_estimator
15711571
utils.estimator_checks.parametrize_with_checks
1572+
utils.estimator_html_repr
15721573
utils.extmath.safe_sparse_dot
15731574
utils.extmath.randomized_range_finder
15741575
utils.extmath.randomized_svd

doc/modules/compose.rst

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,31 @@ above example would be::
528528
('countvectorizer', CountVectorizer(),
529529
'title')])
530530

531+
.. _visualizing_composite_estimators:
532+
533+
Visualizing Composite Estimators
534+
================================
535+
536+
Estimators can be displayed with a HTML representation when shown in a
537+
jupyter notebook. This can be useful to diagnose or visualize a Pipeline with
538+
many estimators. This visualization is activated by setting the
539+
`display` option in :func:`sklearn.set_config`::
540+
541+
>>> from sklearn import set_config
542+
>>> set_config(display='diagram') # doctest: +SKIP
543+
>>> # diplays HTML representation in a jupyter context
544+
>>> column_trans # doctest: +SKIP
545+
546+
An example of the HTML output can be seen in the
547+
**HTML representation of Pipeline** section of
548+
:ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`.
549+
As an alternative, the HTML can be written to a file using
550+
:func:`~sklearn.utils.estimator_html_repr`::
551+
552+
>>> from sklearn.utils import estimator_html_repr
553+
>>> with open('my_estimator.html', 'w') as f: # doctest: +SKIP
554+
... f.write(estimator_html_repr(clf))
555+
531556
.. topic:: Examples:
532557

533558
* :ref:`sphx_glr_auto_examples_compose_plot_column_transformer.py`

doc/modules/ensemble.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1018,6 +1018,8 @@ If no missing values were encountered for a given feature during training,
10181018
then samples with missing values are mapped to whichever child has the most
10191019
samples.
10201020

1021+
.. _sw_hgbdt:
1022+
10211023
Sample weight support
10221024
---------------------
10231025

doc/modules/svm.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ After being fitted, the model can then be used to predict new values::
9090
SVMs decision function (detailed in the :ref:`svm_mathematical_formulation`)
9191
depends on some subset of the training data, called the support vectors. Some
9292
properties of these support vectors can be found in attributes
93-
``support_vectors_``, ``support_`` and ``n_support``::
93+
``support_vectors_``, ``support_`` and ``n_support_``::
9494

9595
>>> # get support vectors
9696
>>> clf.support_vectors_

doc/templates/index.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
<h1 class="sk-landing-header text-white text-monospace">scikit-learn</h1>
99
<h4 class="sk-landing-subheader text-white font-italic mb-3">Machine Learning in Python</h4>
1010
<a class="btn sk-landing-btn mb-1" href="{{ pathto('getting_started') }}" role="button">Getting Started</a>
11-
<a class="btn sk-landing-btn mb-1" href="whats_new/v{{ version }}.html" role="button">What's New in {{ release }}</a>
11+
<a class="btn sk-landing-btn mb-1" href="{{ pathto(release_highlights) }}" role="button">Release Highlights for {{ release_highlights_version }}</a>
1212
<a class="btn sk-landing-btn mb-1" href="https://github.com/scikit-learn/scikit-learn" role="button">GitHub</a>
1313
</div>
1414
<div class="col-md-6 d-flex">
@@ -160,7 +160,7 @@ <h4 class="sk-landing-call-header">News</h4>
160160
</li>
161161
<li><strong>March 2020.</strong> scikit-learn 0.22.2 is available for download (<a href="whats_new/v0.22.html#version-0-22-2">Changelog</a>).
162162
<li><strong>January 2020.</strong> scikit-learn 0.22.1 is available for download (<a href="whats_new/v0.22.html#version-0-22-1">Changelog</a>).
163-
<li><strong>December 2019.</strong> scikit-learn 0.22 is available for download (<a href="whats_new/v0.22.html#version-0-22-0">Changelog</a>).
163+
<li><strong>December 2019.</strong> scikit-learn 0.22 is available for download (<a href="whats_new/v0.22.html#version-0-22-0">Changelog</a> and <a href="{{ pathto('auto_examples/release_highlights/plot_release_highlights_0_22_0') }}">Release Highlights</a>).
164164
</li>
165165
<li><strong>Scikit-learn from 0.21 requires Python 3.5 or greater.</strong>
166166
</li>

doc/themes/scikit-learn-modern/nav.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
{%- set drop_down_navigation = [
1010
('Getting Started', pathto('getting_started')),
1111
('Tutorial', pathto('tutorial/index')),
12+
("What's new", 'whats_new/v' + version + '.html'),
1213
('Glossary', pathto('glossary')),
1314
('Development', pathto('developers/index')),
1415
('FAQ', pathto('faq')),

doc/whats_new/v0.23.rst

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ Version 0.23.0
99

1010
**In Development**
1111

12+
For a short description of the main highlights of the release, please
13+
refer to
14+
:ref:`sphx_glr_auto_examples_release_highlights_plot_release_highlights_0_23_0.py`.
15+
1216

1317
.. include:: changelog_legend.inc
1418

@@ -103,9 +107,9 @@ Changelog
103107
:pr:`16149` by :user:`Jeremie du Boisberranger <jeremiedbb>` and
104108
:user:`Alex Shacked <alexshacked>`.
105109

106-
- |Efficiency| The critical parts of :class:`cluster.KMeans` have a more
107-
optimized implementation. Parallelism is now over the data instead of over
108-
initializations allowing better scalability. :pr:`11950` by
110+
- |Efficiency| |MajorFeature| The critical parts of :class:`cluster.KMeans`
111+
have a more optimized implementation. Parallelism is now over the data
112+
instead of over initializations allowing better scalability. :pr:`11950` by
109113
:user:`Jeremie du Boisberranger <jeremiedbb>`.
110114

111115
- |Enhancement| :class:`cluster.KMeans` now supports sparse data when
@@ -124,6 +128,10 @@ Changelog
124128
could not have a `np.int64` type. :pr:`16484`
125129
by :user:`Jeremie du Boisberranger <jeremiedbb>`.
126130

131+
- |Fix| :class:`cluster.AgglomerativeCluClustering` add specific error when
132+
distance matrix is not square and `affinity=precomputed`.
133+
:pr:`16257` by :user:`Simona Maggio <simonamaggio>`.
134+
127135
- |API| The ``n_jobs`` parameter of :class:`cluster.KMeans`,
128136
:class:`cluster.SpectralCoclustering` and
129137
:class:`cluster.SpectralBiclustering` is deprecated. They now use OpenMP
@@ -187,6 +195,12 @@ Changelog
187195
`ValueError` for arguments `n_classes < 1` OR `length < 1`.
188196
:pr:`16006` by :user:`Rushabh Vasani <rushabh-v>`.
189197

198+
- |API| The `StreamHandler` was removed from `sklearn.logger` to avoid
199+
double logging of messages in common cases where a hander is attached
200+
to the root logger, and to follow the Python logging documentation
201+
recommendation for libraries to leave the log message handling to
202+
users and application code. :pr:`16451` by :user:`Christoph Deil <cdeil>`.
203+
190204
:mod:`sklearn.decomposition`
191205
............................
192206

@@ -234,7 +248,7 @@ Changelog
234248
samples in the training set. :pr:`14516` by :user:`Johann Faouzi
235249
<johannfaouzi>`.
236250

237-
- |Feature| :class:`ensemble.HistGradientBoostingClassifier` and
251+
- |MajorFeature| :class:`ensemble.HistGradientBoostingClassifier` and
238252
:class:`ensemble.HistGradientBoostingRegressor` now support monotonic
239253
constraints, useful when features are supposed to have a positive/negative
240254
effect on the target. :pr:`15582` by `Nicolas Hug`_.
@@ -340,9 +354,10 @@ Changelog
340354
:pr:`14300` by :user:`Christian Lorentzen <lorentzenchr>`, `Roman Yurchak`_,
341355
and `Olivier Grisel`_.
342356

343-
- |Feature| Support of `sample_weight` in :class:`linear_model.ElasticNet` and
344-
:class:`linear_model.Lasso` for dense feature matrix `X`.
345-
:pr:`15436` by :user:`Christian Lorentzen <lorentzenchr>`.
357+
- |MajorFeature| Support of `sample_weight` in
358+
:class:`linear_model.ElasticNet` and :class:`linear_model.Lasso` for dense
359+
feature matrix `X`. :pr:`15436` by :user:`Christian Lorentzen
360+
<lorentzenchr>`.
346361

347362
- |Efficiency| :class:`linear_model.RidgeCV` and
348363
:class:`linear_model.RidgeClassifierCV` now does not allocate a
@@ -395,6 +410,13 @@ Changelog
395410
using joblib loky backend. :pr:`14264` by
396411
:user:`Jérémie du Boisberranger <jeremiedbb>`.
397412

413+
- |Efficiency| Speed up :class:`linear_model.MultiTaskLasso`,
414+
:class:`linear_model.MultiTaskLassoCV`, :class:`linear_model.MultiTaskElasticNet`,
415+
:class:`linear_model.MultiTaskElasticNetCV` by avoiding slower
416+
BLAS Level 2 calls on small arrays
417+
:pr:`17021` by :user:`Alex Gramfort <agramfort>` and
418+
:user:`Mathurin Massias <mathurinm>`.
419+
398420
:mod:`sklearn.metrics`
399421
......................
400422

@@ -436,9 +458,9 @@ Changelog
436458
type and details.
437459
:pr:`15622` by :user:`Gregory Morse <GregoryMorse>`.
438460

439-
- |Fix| :func: `cross_val_predict` supports `method="predict_proba"`
440-
when `y=None`.
441-
:pr:`15918` by :user:`Luca Kubin <lkubin>`.
461+
- |Fix| :func:`model_selection.cross_val_predict` supports
462+
`method="predict_proba"` when `y=None`.:pr:`15918` by
463+
:user:`Luca Kubin <lkubin>`.
442464

443465
- |Fix| :func:`model_selection.fit_grid_point` is deprecated in 0.23 and will
444466
be removed in 0.25. :pr:`16401` by
@@ -567,6 +589,12 @@ Changelog
567589
:mod:`sklearn.utils`
568590
....................
569591

592+
- |MajorFeature| Estimators can now be displayed with a rich html
593+
representation. This can be enabled in Jupyter notebooks by setting
594+
`display='diagram'` in :func:`~sklearn.set_config`. The raw html can be
595+
returned by using :func:`utils.estimator_html_repr`.
596+
:pr:`14180` by `Thomas Fan`_.
597+
570598
- |Enhancement| improve error message in :func:`utils.validation.column_or_1d`.
571599
:pr:`15926` by :user:`Loïc Estève <lesteve>`.
572600

@@ -595,16 +623,14 @@ Changelog
595623
- |FIX| :func:`utils.all_estimators` now only returns public estimators.
596624
:pr:`15380` by `Thomas Fan`_.
597625

598-
:mod:`sklearn.cluster`
599-
......................
600-
601-
- |Fix| :class:`cluster.AgglomerativeClustering` add specific error when
602-
distance matrix is not square and `affinity=precomputed`.
603-
:pr:`16257` by :user:`Simona Maggio <simonamaggio>`.
604-
605626
Miscellaneous
606627
.............
607628

629+
- |MajorFeature| Adds a HTML representation of estimators to be shown in
630+
a jupyter notebook or lab. This visualization is acitivated by setting the
631+
`display` option in :func:`sklearn.set_config`. :pr:`14180` by
632+
`Thomas Fan`_.
633+
608634
- |Enhancement| ``scikit-learn`` now works with ``mypy`` without errors.
609635
:pr:`16726` by `Roman Yurchak`_.
610636

examples/compose/plot_column_transformer_mixed_types.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,15 @@
8787
clf.fit(X_train, y_train)
8888
print("model score: %.3f" % clf.score(X_test, y_test))
8989

90+
##############################################################################
91+
# HTML representation of ``Pipeline``
92+
###############################################################################
93+
# When the ``Pipeline`` is printed out in a jupyter notebook an HTML
94+
# representation of the estimator is displayed as follows:
95+
from sklearn import set_config
96+
set_config(display='diagram')
97+
clf
98+
9099
###############################################################################
91100
# Use ``ColumnTransformer`` by selecting column by data types
92101
###############################################################################

examples/linear_model/plot_bayesian_ridge_curvefit.py

100755100644
File mode changed.

0 commit comments

Comments
 (0)
0