8000 [MRG] MNT Remove code deprecated in 0.18 by massich · Pull Request #10094 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG] MNT Remove code deprecated in 0.18 #10094

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10000
15 changes: 3 additions & 12 deletions benchmarks/bench_plot_incremental_pca.py
10000
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from collections import defaultdict
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_lfw_people
from sklearn.decomposition import IncrementalPCA, RandomizedPCA, PCA
from sklearn.decomposition import IncrementalPCA, PCA


def plot_results(X, y, label):
Expand All @@ -37,7 +37,6 @@ def plot_feature_times(all_times, batch_size, all_components, data):
plot_results(all_components, all_times['pca'], label="PCA")
plot_results(all_components, all_times['ipca'],
label="IncrementalPCA, bsize=%i" % batch_size)
plot_results(all_components, all_times['rpca'], label="RandomizedPCA")
plt.legend(loc="upper left")
plt.suptitle("Algorithm runtime vs. n_components\n \
LFW, size %i x %i" % data.shape)
Expand All @@ -50,7 +49,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
plot_results(all_components, all_errors['pca'], label="PCA")
plot_results(all_components, all_errors['ipca'],
label="IncrementalPCA, bsize=%i" % batch_size)
plot_results(all_components, all_errors['rpca'], label="RandomizedPCA")
plt.legend(loc="lower left")
plt.suptitle("Algorithm error vs. n_components\n"
"LFW, size %i x %i" % data.shape)
Expand All @@ -61,7 +59,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
def plot_batch_times(all_times, n_features, all_batch_sizes, data):
plt.figure()
plot_results(all_batch_sizes, all_times['pca'], label="PCA")
plot_results(all_batch_sizes, all_times['rpca'], label="RandomizedPCA")
plot_results(all_batch_sizes, all_times['ipca'], label="IncrementalPCA")
plt.legend(loc="lower left")
plt.suptitle("Algorithm runtime vs. batch_size for n_components %i\n \
Expand Down Expand Up @@ -92,11 +89,9 @@ def fixed_batch_size_comparison(data):
all_errors = defaultdict(list)
for n_components in all_features:
pca = PCA(n_components=n_components)
rpca = RandomizedPCA(n_components=n_components, random_state=1999)
ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
results_dict = {k: benchmark(est, data) for k, est in [('pca', pca),
('ipca', ipca),
('rpca', rpca)]}
('ipca', ipca)]}

for k in sorted(results_dict.keys()):
all_times[k].append(results_dict[k]['time'])
Expand All @@ -116,9 +111,7 @@ def variable_batch_size_comparison(data):
all_times = defaultdict(list)
all_errors = defaultdict(list)
pca = PCA(n_components=n_components)
rpca = RandomizedPCA(n_components=n_components, random_state=1999)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be reporting PCA(svd_solver='randomized') instead?

results_dict = {k: benchmark(est, data) for k, est in [('pca', pca),
('rpca', rpca)]}
results_dict = {k: benchmark(est, data) for k, est in [('pca', pca)]}

# Create flat baselines to compare the variation over batch size
all_times['pca'].extend([results_dict['pca']['time']] *
Expand All @@ -138,8 +131,6 @@ def variable_batch_size_comparison(data):
all_errors['ipca'].append(results_dict['ipca']['error'])

plot_batch_times(all_times, n_components, batch_sizes, data)
# RandomizedPCA error is always worse (approx 100x) than other PCA
# tests
plot_batch_errors(all_errors, n_components, batch_sizes, data)

faces = fetch_lfw_people(resize=.2, min_faces_per_person=5)
Expand Down
2 changes: 1 addition & 1 deletion doc/modules/model_evaluation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ Usage examples:
>>> model = svm.SVC()
>>> cross_val_score(model, X, y, scoring='wrong_choice')
Traceback (most recent call last):
ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'brier_score_loss', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score']
ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_mutual_info_score', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'brier_score_loss', 'completeness_score', 'explained_variance', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'fowlkes_mallows_score', 'homogeneity_score', 'mean_absolute_error', 'mean_squared_error', 'median_absolute_error', 'mutual_info_score', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error', 'normalized_mutual_info_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'v_measure_score']

.. note::

Expand Down
5 changes: 2 additions & 3 deletions doc/modules/preprocessing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,8 @@ data.
independently, since a downstream model can further make some assumption
on the linear independence of the features.

To address this issue you can use :class:`sklearn.decomposition.PCA`
or :class:`sklearn.decomposition.RandomizedPCA` with ``whiten=True``
to further remove the linear correlation across features.
To address this issue you can use :class:`sklearn.decomposition.PCA` with
``whiten=True`` to further remove the linear correlation across features.

.. topic:: Scaling a 1D array

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@

# Choose cross-validation techniques for the inner and outer loops,
# independently of the dataset.
# E.g "LabelKFold", "LeaveOneOut", "LeaveOneLabelOut", etc.
# E.g "GroupKFold", "LeaveOneOut", "LeaveOneGroupOut", etc.
inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)
outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)

Expand Down
13 changes: 6 additions & 7 deletions sklearn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,14 @@ def config_context(**new_config):
__check_build # avoid flakes unused variable error

__all__ = ['calibration', 'cluster', 'covariance', 'cross_decomposition',
'cross_validation', 'datasets', 'decomposition', 'dummy',
'ensemble', 'exceptions', 'externals', 'feature_extraction',
'feature_selection', 'gaussian_process', 'grid_search',
'isotonic', 'kernel_approximation', 'kernel_ridge',
'learning_curve', 'linear_model', 'manifold', 'metrics',
'datasets', 'decomposition', 'dummy', 'ensemble', 'exceptions',
'externals', 'feature_extraction', 'feature_selection',
'gaussian_process', 'isotonic', 'kernel_approximation',
'kernel_ridge', 'linear_model', 'manifold', 'metrics',
'mixture', 'model_selection', 'multiclass', 'multioutput',
'naive_bayes', 'neighbors', 'neural_network', 'pipeline',
'preprocessing', 'random_projection', 'semi_supervised',
'svm', 'tree', 'discriminant_analysis',
'preprocessing', 'random_projection', 'semi_supervised', 'svm',
'tree', 'discriminant_analysis',
# Non-modules:
'clone']

Expand Down
Loading
0