pfdevilliers
diff --git a/‎sklearn/tests/test_common.py
Lines changed: 70 additions & 13 deletions b/‎sklearn/tests/test_common.py
Lines changed: 70 additions & 13 deletions
@@ -3,15 +3,15 @@
 """
 import warnings
 import numpy as np
-from nose.tools import assert_raises
+from nose.tools import assert_raises, assert_equal
 from numpy.testing import assert_array_equal
 
 from sklearn.utils.testing import all_estimators
 from sklearn.utils.testing import assert_greater
 from sklearn.base import clone, ClassifierMixin, RegressorMixin
 from sklearn.utils import shuffle
 from sklearn.preprocessing import Scaler
-#from sklearn.datasets import load_digits
+from sklearn.cross_validation import train_test_split
 from sklearn.datasets import load_iris, load_boston
 from sklearn.metrics import zero_one_score
 from sklearn.lda import LDA
@@ -26,7 +26,6 @@
         OutputCodeClassifier
 from sklearn.feature_selection import RFE, RFECV
 from sklearn.naive_bayes import MultinomialNB, BernoulliNB
-from sklearn.linear_model import RidgeClassifier, RidgeClassifierCV
 
 dont_test = [Pipeline, GridSearchCV, SparseCoder]
 meta_estimators = [BaseEnsemble, OneVsOneClassifier, OutputCodeClassifier,
@@ -55,15 +54,17 @@ def test_all_estimators():
         print(w)
 
 
-def test_classifiers():
+def test_classifiers_train():
+    # test if classifiers do something sensible on training set
+    # also test all shapes / shape errors
     estimators = all_estimators()
     classifiers = [(name, E) for name, E in estimators if issubclass(E,
         ClassifierMixin)]
     iris = load_iris()
     X, y = iris.data, iris.target
     X, y = shuffle(X, y, random_state=7)
-    #digits = load_digits()
-    #X, y = digits.data, digits.target
+    n_samples, n_features = X.shape
+    n_labels = len(np.unique(y))
     X = Scaler().fit_transform(X)
     for name, Clf in classifiers:
         if Clf in dont_test or Clf in meta_estimators:
@@ -75,6 +76,7 @@ def test_classifiers():
         # fit
         clf.fit(X, y)
         y_pred = clf.predict(X)
+        assert_equal(y_pred.shape, (n_samples,))
         # training set performance
         assert_greater(zero_one_score(y, y_pred), 0.78)
         # raises error on malformed input for predict
@@ -84,24 +86,82 @@ def test_classifiers():
         assert_raises(ValueError, clf.predict, X.T)
         if hasattr(clf, "decision_function"):
             try:
-                #raises error on malformed input for decision_function
+                # raises error on malformed input for decision_function
                 assert_raises(ValueError, clf.decision_function, X.T)
-                #decision_function agrees with predict:
+                # decision_function agrees with predict:
                 decision = clf.decision_function(X)
+                assert_equal(decision.shape, (n_samples, n_labels))
                 assert_array_equal(np.argmax(decision, axis=1), y_pred)
             except NotImplementedError:
                 pass
         if hasattr(clf, "predict_proba"):
             try:
+                # raises error on malformed input for predict_proba
                 assert_raises(ValueError, clf.predict_proba, X.T)
-                # decision_function agrees with predict:
+                # predict_proba agrees with predict:
                 y_prob = clf.predict_proba(X)
+                assert_equal(y_prob.shape, (n_samples, n_labels))
                 assert_array_equal(np.argmax(y_prob, axis=1), y_pred)
             except NotImplementedError:
                 pass
 
 
-def test_regressors():
+def test_classifiers_classes():
+    # test if classifiers can cope with non-consecutive classes
+    estimators = all_estimators()
+    classifiers = [(name, E) for name, E in estimators if issubclass(E,
+        ClassifierMixin)]
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    X, y = shuffle(X, y, random_state=7)
+    X = Scaler().fit_transform(X)
+    y = 2 * y + 1
+    # TODO: make work with next line :)
+    #y = y.astype(np.str)
+    for name, Clf in classifiers:
+        if Clf in dont_test or Clf in meta_estimators:
+            continue
+        if Clf in [MultinomialNB, BernoulliNB]:
+            # TODO also test these!
+            continue
+        clf = Clf()
+        # fit
+        clf.fit(X, y)
+        y_pred = clf.predict(X)
+        # training set performance
+        assert_array_equal(np.unique(y), np.unique(y_pred))
+        assert_greater(zero_one_score(y, y_pred), 0.78)
+
+
+def test_classifiers_test():
+    # test if classifiers can cope with non-consecutive classes
+    estimators = all_estimators()
+    classifiers = [(name, E) for name, E in estimators if issubclass(E,
+        ClassifierMixin)]
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    X, y = shuffle(X, y, random_state=7)
+    X = Scaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X, y)
+    for name, Clf in classifiers:
+        if Clf in dont_test or Clf in meta_estimators:
+            continue
+        if Clf in [MultinomialNB, BernoulliNB]:
+            # TODO also test these!
+            continue
+        clf = Clf()
+        # fit
+        try:
+            clf.fit(X_train, y_train)
+            y_pred = clf.predict(X_test)
+            # test set performance
+            assert_greater(zero_one_score(y_test, y_pred), 0.78)
+        except Exception as ex:
+            print(ex)
+            print(clf)
+
+
+def test_regressors_train():
     estimators = all_estimators()
     regressors = [(name, E) for name, E in estimators if issubclass(E,
         RegressorMixin)]
@@ -115,9 +175,6 @@ def test_regressors():
     for name, Reg in regressors:
         if Reg in dont_test or Reg in meta_estimators:
             continue
-        if Reg in [RidgeClassifier, RidgeClassifierCV]:
-            #TODO this is not a regressor!
-            continue
         if hasattr(reg, 'alpha'):
             reg.set_params(alpha=0.01)