scikit-learn · ogrisel · Oct 16, 2015 · Oct 15, 2015 · Oct 15, 2015
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
@@ -31,6 +31,7 @@
 from ..utils.random import choice
 from ..externals.joblib import Parallel
 from ..externals.joblib import delayed
+from ..externals.six import string_types
 
 from . import _k_means
 
@@ -269,7 +270,7 @@ def k_means(X, n_clusters, init='k-means++', precompute_distances='auto',
 
     if max_iter <= 0:
         raise ValueError('Number of iterations should be a positive number,'
-        ' got %d instead' % max_iter)
+                         ' got %d instead' % max_iter)
 
     best_inertia = np.infty
     X = as_float_array(X, copy=copy_x)
@@ -634,10 +635,10 @@ def _init_centroids(X, k, init, random_state=None, x_squared_norms=None,
         raise ValueError(
             "n_samples=%d should be larger than k=%d" % (n_samples, k))
 
-    if init == 'k-means++':
+    if isinstance(init, string_types) and init == 'k-means++':
         centers = _k_init(X, k, random_state=random_state,
                           x_squared_norms=x_squared_norms)
-    elif init == 'random':
+    elif isinstance(init, string_types) and init == 'random':
         seeds = random_state.permutation(n_samples)[:k]
         centers = X[seeds]
     elif hasattr(init, '__array__'):

diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
@@ -34,13 +34,13 @@
 from ..utils.seq_dataset import ArrayDataset, CSRDataset
 
 
-###
-### TODO: intercept for all models
-### We should define a common function to center data instead of
-### repeating the same code inside each fit method.
+#
+# TODO: intercept for all models
+# We should define a common function to center data instead of
+# repeating the same code inside each fit method.
 
-### TODO: bayesian_ridge_regression and bayesian_regression_ard
-### should be squashed into its respective objects.
+# TODO: bayesian_ridge_regression and bayesian_regression_ard
+# should be squashed into its respective objects.
 
 SPARSE_INTERCEPT_DECAY = 0.01
 # For sparse data intercept updates are scaled by this decay factor to avoid
@@ -474,7 +474,7 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
         Xy = None
 
     # precompute if n_samples > n_features
-    if precompute == 'auto':
+    if isinstance(precompute, six.string_types) and precompute == 'auto':
         precompute = (n_samples > n_features)
 
     if precompute is True:

diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
@@ -644,7 +644,8 @@ def fit(self, X, y, check_input=True):
                           "well. You are advised to use the LinearRegression "
                           "estimator", stacklevel=2)
 
-        if self.precompute == 'auto':
+        if (isinstance(self.precompute, six.string_types)
+                and self.precompute == 'auto'):
             warnings.warn("Setting precompute to 'auto', was found to be "
                           "slower even when n_samples > n_features. Hence "
                           "it will be removed in 0.18.",

diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
@@ -26,6 +26,7 @@
 from ..utils import ConvergenceWarning
 from ..externals.joblib import Parallel, delayed
 from ..externals.six.moves import xrange
+from ..externals.six import string_types
 
 import scipy
 solve_triangular_args = {}
@@ -179,7 +180,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
             # speeds up the calculation of the (partial) Gram matrix
             # and allows to easily swap columns
             X = X.copy('F')
-    elif Gram == 'auto':
+    elif isinstance(Gram, string_types) and Gram == 'auto':
         Gram = None
         if X.shape[0] > X.shape[1]:
             Gram = np.dot(X.T, X)

diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
@@ -26,6 +26,7 @@
 
 from ..utils.validation import check_array, check_consistent_length
 from ..utils.validation import column_or_1d
+from ..externals.six import string_types
 
 import warnings
 
@@ -162,11 +163,12 @@ def mean_absolute_error(y_true, y_pred,
         y_true, y_pred, multioutput)
     output_errors = np.average(np.abs(y_pred - y_true),
                                weights=sample_weight, axis=0)
-    if multioutput == 'raw_values':
-        return output_errors
-    elif multioutput == 'uniform_average':
-        # pass None as weights to np.average: uniform mean
-        multioutput = None
+    if isinstance(multioutput, string_types):
+        if multioutput == 'raw_values':
+            return output_errors
+        elif multioutput == 'uniform_average':
+            # pass None as weights to np.average: uniform mean
+            multioutput = None
 
     return np.average(output_errors, weights=multioutput)
 
@@ -229,11 +231,12 @@ def mean_squared_error(y_true, y_pred,
         y_true, y_pred, multioutput)
     output_errors = np.average((y_true - y_pred) ** 2, axis=0,
                                weights=sample_weight)
-    if multioutput == 'raw_values':
-        return output_errors
-    elif multioutput == 'uniform_average':
-        # pass None as weights to np.average: uniform mean
-        multioutput = None
+    if isinstance(multioutput, string_types):
+        if multioutput == 'raw_values':
+            return output_errors
+        elif multioutput == 'uniform_average':
+            # pass None as weights to np.average: uniform mean
+            multioutput = None
 
     return np.average(output_errors, weights=multioutput)
 
@@ -464,20 +467,21 @@ def r2_score(y_true, y_pred,
                       "to 'uniform_average' in 0.18.",
                       DeprecationWarning)
         multioutput = 'variance_weighted'
-    if multioutput == 'raw_values':
-        # return scores individually
-        return output_scores
-    elif multioutput == 'uniform_average':
-        # passing None as weights results is uniform mean
-        avg_weights = None
-    elif multioutput == 'variance_weighted':
-        avg_weights = denominator
-        # avoid fail on constant y or one-element arrays
-        if not np.any(nonzero_denominator):
-            if not np.any(nonzero_numerator):
-                return 1.0
-            else:
-                return 0.0
+    if isinstance(multioutput, string_types):
+        if multioutput == 'raw_values':
+            # return scores individually
+            return output_scores
+        elif multioutput == 'uniform_average':
+            # passing None as weights results is uniform mean
+            avg_weights = None
+        elif multioutput == 'variance_weighted':
+            avg_weights = denominator
+            # avoid fail on constant y or one-element arrays
+            if not np.any(nonzero_denominator):
+                if not np.any(nonzero_numerator):
+                    return 1.0
+                else:
+                    return 0.0
     else:
         avg_weights = multioutput
 

diff --git a/sklearn/mixture/tests/test_gmm.py b/sklearn/mixture/tests/test_gmm.py
@@ -54,7 +54,6 @@ def test_sample_gaussian():
     from sklearn.mixture import sample_gaussian
     x = sample_gaussian([0, 0], [[4, 3], [1, .1]],
                         covariance_type='full', random_state=42)
-    print(x)
     assert_true(np.isfinite(x).all())
 
 

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
@@ -1553,7 +1553,7 @@ def _transform_selected(X, transform, selected="all", copy=True):
     -------
     X : array or sparse matrix, shape=(n_samples, n_features_new)
     """
-    if selected == "all":
+    if isinstance(selected, six.string_types) and selected == "all":
         return transform(X)
 
     X = check_array(X, accept_sparse='csc', copy=copy, dtype=FLOAT_DTYPES)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -38,7 +38,7 @@
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 from sklearn.random_projection import BaseRandomProjection
 from sklearn.feature_selection import SelectKBest
-from sklearn.svm.base import BaseLibSVM, BaseSVC
+from sklearn.svm.base import BaseLibSVM
 from sklearn.pipeline import make_pipeline
 from sklearn.decomposition import NMF, ProjectedGradientNMF
 from sklearn.utils.validation import DataConversionWarning
@@ -479,7 +479,7 @@ def check_fit1d_1sample(name, Estimator):
 
     try:
         estimator.fit(X, y)
-    except ValueError :
+    except ValueError:
         pass
 
 
@@ -1158,7 +1158,6 @@ def check_regressors_train(name, Regressor):
     # and furthermore assumes the presence of outliers, hence
     # skipped
     if name not in ('PLSCanonical', 'CCA', 'RANSACRegressor'):
-        print(regressor)
         assert_greater(regressor.score(X, y_), 0.5)
 
 
@@ -1501,11 +1500,12 @@ def fit(self, X, y):
     assert_true(all(item in deep_params.items() for item in
                     shallow_params.items()))
 
+
 def check_classifiers_regression_target(name, Estimator):
     # Check if classifier throws an exception when fed regression targets
 
     boston = load_boston()
     X, y = boston.data, boston.target
     e = Estimator()
     msg = 'Unknown label type: '
-    assert_raises_regex(ValueError, msg, e.fit, X, y)
+    assert_raises_regex(ValueError, msg, e.fit, X, y)