8000 Fix numpy vstack on generator expressions by ogrisel · Pull Request #12467 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

Fix numpy vstack on generator expressions #12467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/whats_new/v0.20.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ Changelog
:class:`decomposition.IncrementalPCA` when using float32 datasets.
:issue:`12338` by :user:`bauks <bauks>`.

Miscellaneous
.............

- |Fix| Make sure to avoid raising ``FutureWarning`` when calling
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems that FutureWarning is only a proposal (not implemented) in numpy?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's pretty likely to be implemented. If not we can always update our changelog later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you give me some pointers about where the proposal/discussion about FutureWarning and VisibleDeprecationWarning in numpy is happening?

It'll effect the semantics behind sklearn.utils.testing.assert_no_warnings: d6972a9

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see numpy/numpy#12263 mentioned above.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The FutureWarning PR has been merged into numpy master as well: https://github.com/numpy/numpy/pull/12280/files

``np.vstack`` with numpy 1.16 and later (use list comprehensions
instead of generator expressions in many locations of the scikit-learn
code base). :issue:`12467` by :user:`Olivier Grisel`.

.. _changes_0_20:

Version 0.20.0
Expand Down
20 changes: 10 additions & 10 deletions sklearn/cluster/bicluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,10 +305,10 @@ def _fit(self, X):
self.row_labels_ = labels[:n_rows]
self.column_labels_ = labels[n_rows:]

self.rows_ = np.vstack(self.row_labels_ == c
for c in range(self.n_clusters))
self.columns_ = np.vstack(self.column_labels_ == c
for c in range(self.n_clusters))
self.rows_ = np.vstack([self.row_labels_ == c
for c in range(self.n_clusters)])
self.columns_ = np.vstack([self.column_labels_ == c
for c in range(self.n_clusters)])


class SpectralBiclustering(BaseSpectral):
Expand Down Expand Up @@ -504,12 +504,12 @@ def _fit(self, X):
self.column_labels_ = self._project_and_cluster(X.T, best_ut.T,
n_col_clusters)

self.rows_ = np.vstack(self.row_labels_ == label
for label in range(n_row_clusters)
for _ in range(n_col_clusters))
self.columns_ = np.vstack(self.column_labels_ == label
for _ in range(n_row_clusters)
for label in range(n_col_clusters))
self.rows_ = np.vstack([self.row_labels_ == label
for label in range(n_row_clusters)
for _ in range(n_col_clusters)])
self.columns_ = np.vstack([self.column_labels_ == label
for _ in range(n_row_clusters)
for label in range(n_col_clusters)])

def _fit_best_piecewise(self, vectors, n_best, n_clusters):
"""Find the ``n_best`` vectors that are best approximated by piecewise
Expand Down
24 changes: 12 additions & 12 deletions sklearn/datasets/samples_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,8 +629,8 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None,
inner_circ_x = np.cos(linspace_in) * factor
inner_circ_y = np.sin(linspace_in) * factor

X = np.vstack((np.append(outer_circ_x, inner_circ_x),
np.append(outer_circ_y, inner_circ_y))).T
X = np.vstack([np.append(outer_circ_x, inner_circ_x),
np.append(outer_circ_y, inner_circ_y)]).T
y = np.hstack([np.zeros(n_samples_out, dtype=np.intp),
np.ones(n_samples_in, dtype=np.intp)])
if shuffle:
Expand Down Expand Up @@ -683,8 +683,8 @@ def make_moons(n_samples=100, shuffle=True, noise=None, random_state=None):
inner_circ_x = 1 - np.cos(np.linspace(0, np.pi, n_samples_in))
inner_circ_y = 1 - np.sin(np.linspace(0, np.pi, n_samples_in)) - .5

X = np.vstack((np.append(outer_circ_x, inner_circ_x),
np.append(outer_circ_y, inner_circ_y))).T
X = np.vstack([np.append(outer_circ_x, inner_circ_x),
np.append(outer_circ_y, inner_circ_y)]).T
y = np.hstack([np.zeros(n_samples_out, dtype=np.intp),
np.ones(n_samples_in, dtype=np.intp)])

Expand Down Expand Up @@ -1593,8 +1593,8 @@ def make_biclusters(shape, n_clusters, noise=0.0, minval=10,
row_labels = row_labels[row_idx]
col_labels = col_labels[col_idx]

rows = np.vstack(row_labels == c for c in range(n_clusters))
cols = np.vstack(col_labels == c for c in range(n_clusters))
rows = np.vstack([row_labels == c for c in range(n_clusters)])
cols = np.vstack([col_labels == c for 6D47 c in range(n_clusters)])

return result, rows, cols

Expand Down Expand Up @@ -1689,11 +1689,11 @@ def make_checkerboard(shape, n_clusters, noise=0.0, minval=10,
row_labels = row_labels[row_idx]
col_labels = col_labels[col_idx]

rows = np.vstack(row_labels == label
for label in range(n_row_clusters)
for _ in range(n_col_clusters))
cols = np.vstack(col_labels == label
for _ in range(n_row_clusters)
for label in range(n_col_clusters))
rows = np.vstack([row_labels == label
for label in range(n_row_clusters)
for _ in range(n_col_clusters)])
cols = np.vstack([col_labels == label
for _ in range(n_row_clusters)
for label in range(n_col_clusters)])

return result, rows, cols
4 changes: 2 additions & 2 deletions sklearn/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ def predict(self, X):
k in range(self.n_outputs_)], [n_samples, 1])

elif self.strategy == "stratified":
y = np.vstack(classes_[k][proba[k].argmax(axis=1)] for
k in range(self.n_outputs_)).T
y = np.vstack([classes_[k][proba[k].argmax(axis=1)] for
k in range(self.n_outputs_)]).T

elif self.strategy == "uniform":
ret = [classes_[k][rs.randint(n_classes_[k], size=n_samples)]
Expand Down
4 changes: 2 additions & 2 deletions sklearn/linear_model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,8 +478,8 @@ def fit(self, X, y, sample_weight=None):
outs = Parallel(n_jobs=n_jobs_)(
delayed(sparse_lsqr)(X, y[:, j].ravel())
for j in range(y.shape[1]))
self.coef_ = np.vstack(out[0] for out in outs)
self._residues = np.vstack(out[3] for out in outs)
self.coef_ = np.vstack([out[0] for out in outs])
self._residues = np.vstack([out[3] for out in outs])
else:
self.coef_, self._residues, self.rank_, self.singular_ = \
linalg.lstsq(X, y)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/metrics/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def __call__(self, clf, X, y, sample_weight=None):

# For multi-output multi-class estimator
if isinstance(y_pred, list):
y_pred = np.vstack(p for p in y_pred).T
y_pred = np.vstack([p for p in y_pred]).T

except (NotImplementedError, AttributeError):
y_pred = clf.predict_proba(X)
Expand Down
4 changes: 2 additions & 2 deletions sklearn/metrics/tests/test_score_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def test_thresholded_scorers_multilabel_indicator_data():
clf.fit(X_train, y_train)
y_proba = clf.predict_proba(X_test)
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T)
score2 = roc_auc_score(y_test, np.vstack([p[:, -1] for p in y_proba]).T)
assert_almost_equal(score1, score2)

# Multi-output multi-class decision_function
Expand All @@ -398,7 +398,7 @@ def test_thresholded_scorers_multilabel_indicator_data():

y_proba = clf.decision_function(X_test)
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T)
score2 = roc_auc_score(y_test, np.vstack([p for p in y_proba]).T)
assert_almost_equal(score1, score2)

# Multilabel predict_proba
Expand Down
4 changes: 2 additions & 2 deletions sklearn/neighbors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,10 @@ class from an array representing our data set and ask who's
kwds = ({'squared': True} if self.effective_metric_ == 'euclidean'
else self.effective_metric_params_)

result = pairwise_distances_chunked(
result = list(pairwise_distances_chunked(
X, self._fit_X, reduce_func=reduce_func,
metric=self.effective_metric_, n_jobs=n_jobs,
**kwds)
**kwds))

elif self._fit_method in ['ball_tree', 'kd_tree']:
if issparse(X):
Expand Down
4 changes: 2 additions & 2 deletions sklearn/preprocessing/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1382,8 +1382,8 @@ def powers_(self):
combinations = self._combinations(self.n_input_features_, self.degree,
self.interaction_only,
self.include_bias)
return np.vstack(np.bincount(c, minlength=self.n_input_features_)
for c in combinations)
return np.vstack([np.bincount(c, minlength=self.n_input_features_)
for c in combinations])

def get_feature_names(self, input_features=None):
"""
Expand Down
0