From 79fa25ade3f9f27d2d7ade7830686609c8d5e9f4 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 15 Oct 2015 15:23:00 -0400 Subject: [PATCH 1/2] MAINT don't print things in testing. --- sklearn/mixture/tests/test_gmm.py | 1 - sklearn/utils/estimator_checks.py | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sklearn/mixture/tests/test_gmm.py b/sklearn/mixture/tests/test_gmm.py index dd3a4380ffca3..7d79c3c12abd6 100644 --- a/sklearn/mixture/tests/test_gmm.py +++ b/sklearn/mixture/tests/test_gmm.py @@ -54,7 +54,6 @@ def test_sample_gaussian(): from sklearn.mixture import sample_gaussian x = sample_gaussian([0, 0], [[4, 3], [1, .1]], covariance_type='full', random_state=42) - print(x) assert_true(np.isfinite(x).all()) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 3828f85bcc70f..e87da57554ff5 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -38,7 +38,7 @@ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.random_projection import BaseRandomProjection from sklearn.feature_selection import SelectKBest -from sklearn.svm.base import BaseLibSVM, BaseSVC +from sklearn.svm.base import BaseLibSVM from sklearn.pipeline import make_pipeline from sklearn.decomposition import NMF, ProjectedGradientNMF from sklearn.utils.validation import DataConversionWarning @@ -479,7 +479,7 @@ def check_fit1d_1sample(name, Estimator): try: estimator.fit(X, y) - except ValueError : + except ValueError: pass @@ -1158,7 +1158,6 @@ def check_regressors_train(name, Regressor): # and furthermore assumes the presence of outliers, hence # skipped if name not in ('PLSCanonical', 'CCA', 'RANSACRegressor'): - print(regressor) assert_greater(regressor.score(X, y_), 0.5) @@ -1501,6 +1500,7 @@ def fit(self, X, y): assert_true(all(item in deep_params.items() for item in shallow_params.items())) + def check_classifiers_regression_target(name, Estimator): # Check if classifier throws an exception when fed regression targets @@ -1508,4 +1508,4 @@ def check_classifiers_regression_target(name, Estimator): X, y = boston.data, boston.target e = Estimator() msg = 'Unknown label type: ' - assert_raises_regex(ValueError, msg, e.fit, X, y) \ No newline at end of file + assert_raises_regex(ValueError, msg, e.fit, X, y) From c66689c4f421d5ae472acd3ee426f5d82d5ca780 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 15 Oct 2015 16:07:17 -0400 Subject: [PATCH 2/2] FIX Don't compare arrays to strings!!!! --- sklearn/cluster/k_means_.py | 7 +-- sklearn/linear_model/base.py | 14 +++--- sklearn/linear_model/coordinate_descent.py | 3 +- sklearn/linear_model/least_angle.py | 3 +- sklearn/metrics/regression.py | 52 ++++++++++++---------- sklearn/preprocessing/data.py | 2 +- 6 files changed, 44 insertions(+), 37 deletions(-) diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py index d490db353d143..dd9058d966e85 100644 --- a/sklearn/cluster/k_means_.py +++ b/sklearn/cluster/k_means_.py @@ -31,6 +31,7 @@ from ..utils.random import choice from ..externals.joblib import Parallel from ..externals.joblib import delayed +from ..externals.six import string_types from . import _k_means @@ -269,7 +270,7 @@ def k_means(X, n_clusters, init='k-means++', precompute_distances='auto', if max_iter <= 0: raise ValueError('Number of iterations should be a positive number,' - ' got %d instead' % max_iter) + ' got %d instead' % max_iter) best_inertia = np.infty X = as_float_array(X, copy=copy_x) @@ -634,10 +635,10 @@ def _init_centroids(X, k, init, random_state=None, x_squared_norms=None, raise ValueError( "n_samples=%d should be larger than k=%d" % (n_samples, k)) - if init == 'k-means++': + if isinstance(init, string_types) and init == 'k-means++': centers = _k_init(X, k, random_state=random_state, x_squared_norms=x_squared_norms) - elif init == 'random': + elif isinstance(init, string_types) and init == 'random': seeds = random_state.permutation(n_samples)[:k] centers = X[seeds] elif hasattr(init, '__array__'): diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py index 3600735c78394..af107433ac408 100644 --- a/sklearn/linear_model/base.py +++ b/sklearn/linear_model/base.py @@ -34,13 +34,13 @@ from ..utils.seq_dataset import ArrayDataset, CSRDataset -### -### TODO: intercept for all models -### We should define a common function to center data instead of -### repeating the same code inside each fit method. +# +# TODO: intercept for all models +# We should define a common function to center data instead of +# repeating the same code inside each fit method. -### TODO: bayesian_ridge_regression and bayesian_regression_ard -### should be squashed into its respective objects. +# TODO: bayesian_ridge_regression and bayesian_regression_ard +# should be squashed into its respective objects. SPARSE_INTERCEPT_DECAY = 0.01 # For sparse data intercept updates are scaled by this decay factor to avoid @@ -474,7 +474,7 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy): Xy = None # precompute if n_samples > n_features - if precompute == 'auto': + if isinstance(precompute, six.string_types) and precompute == 'auto': precompute = (n_samples > n_features) if precompute is True: diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index 13973369c1f6a..07bcedc249ad8 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -644,7 +644,8 @@ def fit(self, X, y, check_input=True): "well. You are advised to use the LinearRegression " "estimator", stacklevel=2) - if self.precompute == 'auto': + if (isinstance(self.precompute, six.string_types) + and self.precompute == 'auto'): warnings.warn("Setting precompute to 'auto', was found to be " "slower even when n_samples > n_features. Hence " "it will be removed in 0.18.", diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index d1898aee9e98c..b27d60fefef45 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -26,6 +26,7 @@ from ..utils import ConvergenceWarning from ..externals.joblib import Parallel, delayed from ..externals.six.moves import xrange +from ..externals.six import string_types import scipy solve_triangular_args = {} @@ -179,7 +180,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, # speeds up the calculation of the (partial) Gram matrix # and allows to easily swap columns X = X.copy('F') - elif Gram == 'auto': + elif isinstance(Gram, string_types) and Gram == 'auto': Gram = None if X.shape[0] > X.shape[1]: Gram = np.dot(X.T, X) diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py index 462349d5bd892..1a19e013e4bab 100644 --- a/sklearn/metrics/regression.py +++ b/sklearn/metrics/regression.py @@ -26,6 +26,7 @@ from ..utils.validation import check_array, check_consistent_length from ..utils.validation import column_or_1d +from ..externals.six import string_types import warnings @@ -162,11 +163,12 @@ def mean_absolute_error(y_true, y_pred, y_true, y_pred, multioutput) output_errors = np.average(np.abs(y_pred - y_true), weights=sample_weight, axis=0) - if multioutput == 'raw_values': - return output_errors - elif multioutput == 'uniform_average': - # pass None as weights to np.average: uniform mean - multioutput = None + if isinstance(multioutput, string_types): + if multioutput == 'raw_values': + return output_errors + elif multioutput == 'uniform_average': + # pass None as weights to np.average: uniform mean + multioutput = None return np.average(output_errors, weights=multioutput) @@ -229,11 +231,12 @@ def mean_squared_error(y_true, y_pred, y_true, y_pred, multioutput) output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight) - if multioutput == 'raw_values': - return output_errors - elif multioutput == 'uniform_average': - # pass None as weights to np.average: uniform mean - multioutput = None + if isinstance(multioutput, string_types): + if multioutput == 'raw_values': + return output_errors + elif multioutput == 'uniform_average': + # pass None as weights to np.average: uniform mean + multioutput = None return np.average(output_errors, weights=multioutput) @@ -464,20 +467,21 @@ def r2_score(y_true, y_pred, "to 'uniform_average' in 0.18.", DeprecationWarning) multioutput = 'variance_weighted' - if multioutput == 'raw_values': - # return scores individually - return output_scores - elif multioutput == 'uniform_average': - # passing None as weights results is uniform mean - avg_weights = None - elif multioutput == 'variance_weighted': - avg_weights = denominator - # avoid fail on constant y or one-element arrays - if not np.any(nonzero_denominator): - if not np.any(nonzero_numerator): - return 1.0 - else: - return 0.0 + if isinstance(multioutput, string_types): + if multioutput == 'raw_values': + # return scores individually + return output_scores + elif multioutput == 'uniform_average': + # passing None as weights results is uniform mean + avg_weights = None + elif multioutput == 'variance_weighted': + avg_weights = denominator + # avoid fail on constant y or one-element arrays + if not np.any(nonzero_denominator): + if not np.any(nonzero_numerator): + return 1.0 + else: + return 0.0 else: avg_weights = multioutput diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index eb7d7a1fb95f7..b3262c41eb490 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -1553,7 +1553,7 @@ def _transform_selected(X, transform, selected="all", copy=True): ------- X : array or sparse matrix, shape=(n_samples, n_features_new) """ - if selected == "all": + if isinstance(selected, six.string_types) and selected == "all": return transform(X) X = check_array(X, accept_sparse='csc', copy=copy, dtype=FLOAT_DTYPES)