8000 Merge remote-tracking branch 'upstream/master' into add_codeblock_cop… · thoo/scikit-learn@b83f4a5 · GitHub
[go: up one dir, main page]

Skip to content

Commit b83f4a5

Browse files
committed
Merge remote-tracking branch 'upstream/master' into add_codeblock_copybutton
* upstream/master: FIX remove FutureWarning in _object_dtype_isnan and add test (scikit-learn#12567) DOC Add 's' to "correspond" in docs for Hamming Loss. (scikit-learn#12565) EXA Fix comment in plot-iris-logistic example (scikit-learn#12564) FIX stop words validation in text vectorizers with custom preprocessors / tokenizers (scikit-learn#12393) DOC Add skorch to related projects (scikit-learn#12561) MNT Don't change self.n_values in OneHotEncoder.fit (scikit-learn#12286) MNT Remove unused assert_true imports (scikit-learn#12560) TST autoreplace assert_true(...==...) with plain assert (scikit-learn#12547) DOC: add a testimonial from JP Morgan (scikit-learn#12555)
2 parents 78ba979 + 4e81949 commit b83f4a5

File tree

83 files changed

+744
-671
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+744
-671
lines changed

doc/related_projects.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ and tasks.
150150

151151
- `lasagne <https://github.com/Lasagne/Lasagne>`_ A lightweight library to
152152
build and train neural networks in Theano.
153+
154+
- `skorch <https://github.com/dnouri/skorch>`_ A scikit-learn compatible
155+
neural network library that wraps PyTorch.
153156

154157
**Broad scope**
155158

doc/testimonials/images/jpmorgan.png

8.16 KB
Loading

doc/testimonials/testimonials.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,39 @@ Who is using scikit-learn?
1111

1212
.. to add a testimonials, just XXX
1313
14+
`J.P.Morgan <https://www.jpmorgan.com>`_
15+
------------------------------------------
16+
17+
.. raw:: html
18+
19+
<div class="logo">
20+
21+
.. image:: images/jpmorgan.png
22+
:width: 120pt
23+
:target: https://www.jpmorgan.com
24+
25+
.. raw:: html
26+
27+
</div>
28+
29+
Scikit-learn is an indispensable part of the Python machine learning
30+
toolkit at JPMorgan. It is very widely used across all parts of the bank
31+
for classification, predictive analytics, and very many other machine
32+
learning tasks. Its straightforward API, its breadth of algorithms, and
33+
the quality of its documentation combine to make scikit-learn
34+
simultaneously very approachable and very powerful.
35+
36+
.. raw:: html
37+
38+
<span class="testimonial-author">
39+
40+
Stephen Simmons, VP, Athena Research, JPMorgan
41+
42+
.. raw:: html
43+
44+
</span>
45+
46+
1447
`Spotify <https://www.spotify.com>`_
1548
------------------------------------
1649

doc/whats_new/v0.20.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,14 @@ Changelog
8585
where ``max_features`` was sometimes rounded down to zero.
8686
:issue:`12388` by :user:`Connor Tann <Connossor>`.
8787

88+
:mod:`sklearn.feature_extraction`
89+
...........................
90+
91+
- |Fix| Fixed a regression in v0.20.0 where
92+
:func:`feature_extraction.text.CountVectorizer` and other text vectorizers
93+
could error during stop words validation with custom preprocessors
94+
or tokenizers. :issue:`12393` by `Roman Yurchak`_.
95+
8896
:mod:`sklearn.linear_model`
8997
...........................
9098

examples/linear_model/plot_iris_logistic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial')
3232

33-
# we create an instance of Neighbours Classifier and fit the data.
33+
# Create an instance of Logistic Regression Classifier and fit the data.
3434
logreg.fit(X, Y)
3535

3636
# Plot the decision boundary. For that, we will assign a color to each

sklearn/cluster/tests/test_affinity_propagation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from sklearn.exceptions import ConvergenceWarning
99
from sklearn.utils.testing import (
10-
assert_equal, assert_false, assert_true, assert_array_equal, assert_raises,
10+
assert_equal, assert_false, assert_array_equal, assert_raises,
1111
assert_warns, assert_warns_message, assert_no_warnings)
1212

1313
from sklearn.cluster.affinity_propagation_ import AffinityPropagation
@@ -160,5 +160,5 @@ def test_equal_similarities_and_preferences():
160160
assert_false(_equal_similarities_and_preferences(S, np.array([0, 1])))
161161

162162
# Same preferences
163-
assert_true(_equal_similarities_and_preferences(S, np.array([0, 0])))
164-
assert_true(_equal_similarities_and_preferences(S, np.array(0)))
163+
assert _equal_similarities_and_preferences(S, np.array([0, 0]))
164+
assert _equal_similarities_and_preferences(S, np.array(0))

sklearn/cluster/tests/test_bicluster.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from sklearn.utils.testing import assert_array_equal
1111
from sklearn.utils.testing import assert_array_almost_equal
1212
from sklearn.utils.testing import assert_raises
13-
from sklearn.utils.testing import assert_true
1413
from sklearn.utils.testing import SkipTest
1514

1615
from sklearn.base import BaseEstimator, BiclusterMixin
@@ -51,7 +50,7 @@ def test_get_submatrix():
5150
submatrix[:] = -1
5251
if issparse(X):
5352
X = X.toarray()
54-
assert_true(np.all(X != -1))
53+
assert np.all(X != -1)
5554

5655

5756
def _test_shape_indices(model):

sklearn/cluster/tests/test_feature_agglomeration.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Authors: Sergul Aydore 2017
55
import numpy as np
66
from sklearn.cluster import FeatureAgglomeration
7-
from sklearn.utils.testing import assert_true, assert_no_warnings
7+
from sklearn.utils.testing import assert_no_warnings
88
from sklearn.utils.testing import assert_array_almost_equal
99

1010

@@ -18,24 +18,24 @@ def test_feature_agglomeration():
1818
pooling_func=np.median)
1919
assert_no_warnings(agglo_mean.fit, X)
2020
assert_no_warnings(agglo_median.fit, X)
21-
assert_true(np.size(np.unique(agglo_mean.labels_)) == n_clusters)
22-
assert_true(np.size(np.unique(agglo_median.labels_)) == n_clusters)
23-
assert_true(np.size(agglo_mean.labels_) == X.shape[1])
24-
assert_true(np.size(agglo_median.labels_) == X.shape[1])
21+
assert np.size(np.unique(agglo_mean.labels_)) == n_clusters
22+
assert np.size(np.unique(agglo_median.labels_)) == n_clusters
23+
assert np.size(agglo_mean.labels_) == X.shape[1]
24+
assert np.size(agglo_median.labels_) == X.shape[1]
2525

2626
# Test transform
2727
Xt_mean = agglo_mean.transform(X)
2828
Xt_median = agglo_median.transform(X)
29-
assert_true(Xt_mean.shape[1] == n_clusters)
30-
assert_true(Xt_median.shape[1] == n_clusters)
31-
assert_true(Xt_mean == np.array([1 / 3.]))
32-
assert_true(Xt_median == np.array([0.]))
29+
assert Xt_mean.shape[1] == n_clusters
30+
assert Xt_median.shape[1] == n_clusters
31+
assert Xt_mean == np.array([1 / 3.])
32+
assert Xt_median == np.array([0.])
3333

3434
# Test inverse transform
3535
X_full_mean = agglo_mean.inverse_transform(Xt_mean)
3636
X_full_median = agglo_median.inverse_transform(Xt_median)
37-
assert_true(np.unique(X_full_mean[0]).size == n_clusters)
38-
assert_true(np.unique(X_full_median[0]).size == n_clusters)
37+
assert np.unique(X_full_mean[0]).size == n_clusters
38+
assert np.unique(X_full_median[0]).size == n_clusters
3939

4040
assert_array_almost_equal(agglo_mean.transform(X_full_mean),
4141
Xt_mean)

sklearn/cluster/tests/test_hierarchical.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from scipy import sparse
1515
from scipy.cluster import hierarchy
1616

17-
from sklearn.utils.testing import assert_true
1817
from sklearn.utils.testing import assert_raises
1918
from sklearn.utils.testing import assert_equal
2019
from sklearn.utils.testing import assert_almost_equal
@@ -72,7 +71,7 @@ def test_structured_linkage_tree():
7271
children, n_components, n_leaves, parent = \
7372
tree_builder(X.T, connectivity)
7473
n_nodes = 2 * X.shape[1] - 1
75-
assert_true(len(children) + n_leaves == n_nodes)
74+
assert len(children) + n_leaves == n_nodes
7675
# Check that ward_tree raises a ValueError with a connectivity matrix
7776
# of the wrong shape
7877
assert_raises(ValueError,
@@ -114,7 +113,7 @@ def test_height_linkage_tree():
114113
for linkage_func in _TREE_BUILDERS.values():
115114
children, n_nodes, n_leaves, parent = linkage_func(X.T, connectivity)
116115
n_nodes = 2 * X.shape[1] - 1
117-
assert_true(len(children) + n_leaves == n_nodes)
116+
assert len(children) + n_leaves == n_nodes
118117

119118

120119
def test_agglomerative_clustering_wrong_arg_memory():
@@ -152,7 +151,7 @@ def test_agglomerative_clustering():
152151
linkage=linkage)
153152
clustering.fit(X)
154153
labels = clustering.labels_
155-
assert_true(np.size(np.unique(labels)) == 10)
154+
assert np.size(np.unique(labels)) == 10
156155
finally:
157156
shutil.rmtree(tempdir)
158157
# Turn caching off now
@@ -166,7 +165,7 @@ def test_agglomerative_clustering():
166165
labels), 1)
167166
clustering.connectivity = None
168167
clustering.fit(X)
169-
assert_true(np.size(np.unique(clustering.labels_)) == 10)
168+
assert np.size(np.unique(clustering.labels_)) == 10
170169
# Check that we raise a TypeError on dense matrices
171170
clustering = AgglomerativeClustering(
172171
n_clusters=10,
@@ -226,12 +225,12 @@ def test_ward_agglomeration():
226225
connectivity = grid_to_graph(*mask.shape)
227226
agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
228227
agglo.fit(X)
229-
assert_true(np.size(np.unique(agglo.labels_)) == 5)
228+
assert np.size(np.unique(agglo.labels_)) == 5
230229

231230
X_red = agglo.transform(X)
232-
assert_true(X_red.shape[1] == 5)
231+
assert X_red.shape[1] == 5
233232
X_full = agglo.inverse_transform(X_red)
234-
assert_true(np.unique(X_full[0]).size == 5)
233+
assert np.unique(X_full[0]).size == 5
235234
assert_array_almost_equal(agglo.transform(X_full), X_red)
236235

237236
# Check that fitting with no samples raises a ValueError
@@ -265,7 +264,7 @@ def assess_same_labelling(cut1, cut2):
265264
ecut = np.zeros((n, k))
266265
ecut[np.arange(n), cut] = 1
267266
co_clust.append(np.dot(ecut, ecut.T))
268-
assert_true((co_clust[0] == co_clust[1]).all())
267+
assert (co_clust[0] == co_clust[1]).all()
269268

270269

271270
def test_scikit_vs_scipy():

sklearn/cluster/tests/test_k_means.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from sklearn.utils.testing import assert_almost_equal
1414
from sklearn.utils.testing import assert_raises
1515
from sklearn.utils.testing import assert_raises_regex
16-
from sklearn.utils.testing import assert_true
1716
from sklearn.utils.testing import assert_greater
1817
from sklearn.utils.testing import assert_less
1918
from sklearn.utils.testing import assert_warns
@@ -107,8 +106,8 @@ def test_labels_assignment_and_inertia():
107106
labels_gold[dist < mindist] = center_id
108107
mindist = np.minimum(dist, mindist)
109108
inertia_gold = mindist.sum()
110-
assert_true((mindist >= 0.0).all())
111-
assert_true((labels_gold != -1).all())
109+
assert (mindist >= 0.0).all()
110+
assert (labels_gold != -1).all()
112111

113112
sample_weight = None
114113

@@ -565,9 +564,9 @@ def test_k_means_non_collapsed():
565564
assert_equal(len(np.unique(km.labels_)), 3)
566565

567566
centers = km.cluster_centers_
568-
assert_true(np.linalg.norm(centers[0] - centers[1]) >= 0.1)
569-
assert_true(np.linalg.norm(centers[0] - centers[2]) >= 0.1)
570-
assert_true(np.linalg.norm(centers[1] - centers[2]) >= 0.1)
567+
assert np.linalg.norm(centers[0] - centers[1]) >= 0.1
568+
assert np.linalg.norm(centers[0] - centers[2]) >= 0.1
569+
assert np.linalg.norm(centers[1] - centers[2]) >= 0.1
571570

572571

573572
@pytest.mark.parametrize('algo', ['full', 'elkan'])
@@ -689,7 +688,7 @@ def test_n_init():
689688
failure_msg = ("Inertia %r should be decreasing"
690689
" when n_init is increasing.") % list(inertia)
691690
for i in range(len(n_init_range) - 1):
692-
assert_true(inertia[i] >= inertia[i + 1], failure_msg)
691+
assert inertia[i] >= inertia[i + 1], failure_msg
693692

694693

695694
def test_k_means_function():

0 commit comments

Comments
 (0)
0