8000 Fix numpy vstack on generator expressions (#12467) · scikit-learn/scikit-learn@e67e30c · GitHub
[go: up one dir, main page]

Skip to content

Commit e67e30c

Browse files
ogriselamueller
authored andcommitted
Fix numpy vstack on generator expressions (#12467)
* Workaround vstack issue with genxp * Use list comprehensions instead of genexps with np.vstack * Add changelog entry.
1 parent 338f763 commit e67e30c

File tree

9 files changed

+41
-33
lines changed

9 files changed

+41
-33
lines changed

doc/whats_new/v0.20.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,14 @@ Changelog
103103
:class:`decomposition.IncrementalPCA` when using float32 datasets.
104104
:issue:`12338` by :user:`bauks <bauks>`.
105105

106+
Miscellaneous
107+
.............
108+
109+
- |Fix| Make sure to avoid raising ``FutureWarning`` when calling
110+
``np.vstack`` with numpy 1.16 and later (use list comprehensions
111+
instead of generator expressions in many locations of the scikit-learn
112+
code base). :issue:`12467` by :user:`Olivier Grisel`.
113+
106114
.. _changes_0_20:
107115

108116
Version 0.20.0

sklearn/cluster/bicluster.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -305,10 +305,10 @@ def _fit(self, X):
305305
self.row_labels_ = labels[:n_rows]
306306
self.column_labels_ = labels[n_rows:]
307307

308-
self.rows_ = np.vstack(self.row_labels_ == c
309-
for c in range(self.n_clusters))
310-
self.columns_ = np.vstack(self.column_labels_ == c
311-
for c in range(self.n_clusters))
308+
self.rows_ = np.vstack([self.row_labels_ == c
309+
for c in range(self.n_clusters)])
310+
self.columns_ = np.vstack([self.column_labels_ == c
311+
for c in range(self.n_clusters)])
312312

313313

314314
class SpectralBiclustering(BaseSpectral):
@@ -504,12 +504,12 @@ def _fit(self, X):
504504
self.column_labels_ = self._project_and_cluster(X.T, best_ut.T,
505505
n_col_clusters)
506506

507-
self.rows_ = np.vstack(self.row_labels_ == label
508-
for label in range(n_row_clusters)
509-
for _ in range(n_col_clusters))
510-
self.columns_ = np.vstack(self.column_labels_ == label
511-
for _ in range(n_row_clusters)
512-
for label in range(n_col_clusters))
507+
self.rows_ = np.vstack([self.row_labels_ == label
508+
for label in range(n_row_clusters)
509+
for _ in range(n_col_clusters)])
510+
self.columns_ = np.vstack([self.column_labels_ == label
511+
for _ in range(n_row_clusters)
512+
for label in range(n_col_clusters)])
513513

514514
def _fit_best_piecewise(self, vectors, n_best, n_clusters):
515515
"""Find the ``n_best`` vectors that are best approximated by piecewise

sklearn/datasets/samples_generator.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -629,8 +629,8 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None,
629629
inner_circ_x = np.cos(linspace_in) * factor
630630
inner_circ_y = np.sin(linspace_in) * factor
631631

632-
X = np.vstack((np.append(outer_circ_x, inner_circ_x),
633-
np.append(outer_circ_y, inner_circ_y))).T
632+
X = np.vstack([np.append(outer_circ_x, inner_circ_x),
633+
np.append(outer_circ_y, inner_circ_y)]).T
634634
y = np.hstack([np.zeros(n_samples_out, dtype=np.intp),
635635
np.ones(n_samples_in, dtype=np.intp)])
636636
if shuffle:
@@ -683,8 +683,8 @@ def make_moons(n_samples=100, shuffle=True, noise=None, random_state=None):
683683
inner_circ_x = 1 - np.cos(np.linspace(0, np.pi, n_samples_in))
684684
inner_circ_y = 1 - np.sin(np.linspace(0, np.pi, n_samples_in)) - .5
685685

686-
X = np.vstack((np.append(outer_circ_x, inner_circ_x),
687-
np.append(outer_circ_y, inner_circ_y))).T
686+
X = np.vstack([np.append(outer_circ_x, inner_circ_x),
687+
np.append(outer_circ_y, inner_circ_y)]).T
688688
y = np.hstack([np.zeros(n_samples_out, dtype=np.intp),
689689
np.ones(n_samples_in, dtype=np.intp)])
690690

@@ -1593,8 +1593,8 @@ def make_biclusters(shape, n_clusters, noise=0.0, minval=10,
15931593
row_labels = row_labels[row_idx]
15941594
col_labels = col_labels[col_idx]
15951595

1596-
rows = np.vstack(row_labels == c for c in range(n_clusters))
1597-
cols = np.vstack(col_labels == c for c in range(n_clusters))
1596+
rows = np.vstack([row_labels == c for c in range(n_clusters)])
1597+
cols = np.vstack([col_labels == c for c in range(n_clusters)])
15981598

15991599
return result, rows, cols
16001600

@@ -1689,11 +1689,11 @@ def make_checkerboard(shape, n_clusters, noise=0.0, minval=10,
16891689
row_labels = row_labels[row_idx]
16901690
col_labels = col_labels[col_idx]
16911691

1692-
rows = np.vstack(row_labels == label
1693-
for label in range(n_row_clusters)
1694-
for _ in range(n_col_clusters))
1695-
cols = np.vstack(col_labels == label
1696-
for _ in range(n_row_clusters)
1697-
for label in range(n_col_clusters))
1692+
rows = np.vstack([row_labels == label
1693+
for label in range(n_row_clusters)
1694+
for _ in range(n_col_clusters)])
1695+
cols = np.vstack([col_labels == label
1696+
for _ in range(n_row_clusters)
1697+
for label in range(n_col_clusters)])
16981698

16991699
return result, rows, cols

sklearn/dummy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ def predict(self, X):
220220
k in range(self.n_outputs_)], [n_samples, 1])
221221

222222
elif self.strategy == "stratified":
223-
y = np.vstack(classes_[k][proba[k].argmax(axis=1)] for
224-
k in range(self.n_outputs_)).T
223+
y = np.vstack([classes_[k][proba[k].argmax(axis=1)] for
224+
k in range(self.n_outputs_)]).T
225225

226226
elif self.strategy == "uniform":
227227
ret = [classes_[k][rs.randint(n_classes_[k], size=n_samples)]

sklearn/linear_model/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff 10000 line numberDiff line change
@@ -478,8 +478,8 @@ def fit(self, X, y, sample_weight=None):
478478
outs = Parallel(n_jobs=n_jobs_)(
479479
delayed(sparse_lsqr)(X, y[:, j].ravel())
480480
for j in range(y.shape[1]))
481-
self.coef_ = np.vstack(out[0] for out in outs)
482-
self._residues = np.vstack(out[3] for out in outs)
481+
self.coef_ = np.vstack([out[0] for out in outs])
482+
self._residues = np.vstack([out[3] for out in outs])
483483
else:
484484
self.coef_, self._residues, self.rank_, self.singular_ = \
485485
linalg.lstsq(X, y)

sklearn/metrics/scorer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def __call__(self, clf, X, y, sample_weight=None):
177177

178178
# For multi-output multi-class estimator
179179
if isinstance(y_pred, list):
180-
y_pred = np.vstack(p for p in y_pred).T
180+
y_pred = np.vstack([p for p in y_pred]).T
181181

182182
except (NotImplementedError, AttributeError):
183183
y_pred = clf.predict_proba(X)

sklearn/metrics/tests/test_score_objects.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ def test_thresholded_scorers_multilabel_indicator_data():
385385
clf.fit(X_train, y_train)
386386
y_proba = clf.predict_proba(X_test)
387387
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
388-
score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T)
388+
score2 = roc_auc_score(y_test, np.vstack([p[:, -1] for p in y_proba]).T)
389389
assert_almost_equal(score1, score2)
390390

391391
# Multi-output multi-class decision_function
@@ -398,7 +398,7 @@ def test_thresholded_scorers_multilabel_indicator_data():
398398

399399
y_proba = clf.decision_function(X_test)
400400
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
401-
score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T)
401+
score2 = roc_auc_score(y_test, np.vstack([p for p in y_proba]).T)
402402
assert_almost_equal(score1, score2)
403403

404404
# Multilabel predict_proba

sklearn/neighbors/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,10 +420,10 @@ class from an array representing our data set and ask who's
420420
kwds = ({'squared': True} if self.effective_metric_ == 'euclidean'
421421
else self.effective_metric_params_)
422422

423-
result = pairwise_distances_chunked(
423+
result = list(pairwise_distances_chunked(
424424
X, self._fit_X, reduce_func=reduce_func,
425425
metric=self.effective_metric_, n_jobs=n_jobs,
426-
**kwds)
426+
**kwds))
427427

428428
elif self._fit_method in ['ball_tree', 'kd_tree']:
429429
if issparse(X):

sklearn/preprocessing/data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,8 +1382,8 @@ def powers_(self):
13821382
combinations = self._combinations(self.n_input_features_, self.degree,
13831383
self.interaction_only,
13841384
self.include_bias)
1385-
return np.vstack(np.bincount(c, minlength=self.n_input_features_)
1386-
for c in combinations)
1385+
return np.vstack([np.bincount(c, minlength=self.n_input_features_)
1386+
for c in combinations])
13871387

13881388
def get_feature_names(self, input_features=None):
13891389
"""

0 commit comments

Comments
 (0)
0