scikit-learn
diff --git a/‎sklearn/dummy.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/dummy.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/utils/tests/test_validation.py
Lines changed: 57 additions & 7 deletions b/‎sklearn/utils/tests/test_validation.py
Lines changed: 57 additions & 7 deletions
diff --git a/‎sklearn/utils/validation.py
Lines changed: 50 additions & 7 deletions b/‎sklearn/utils/validation.py
Lines changed: 50 additions & 7 deletions
@@ -435,7 +435,7 @@ def fit(self, X, y, sample_weight=None):
 
             self.constant = check_array(self.constant,
                                         accept_sparse=['csr', 'csc', 'coo'],
-                                        ensure_2d=False)
+                                        ensure_2d=False, ensure_min_samples=0)
 
             if self.output_2d_ and self.constant.shape[0] != y.shape[1]:
                 raise ValueError(
 
@@ -8,6 +8,7 @@
 from itertools import product
 
 from sklearn.utils import as_float_array, check_array, check_symmetric
+from sklearn.utils import check_X_y
 
 from sklearn.utils.estimator_checks import NotAnArray
 
@@ -19,12 +20,12 @@
 from sklearn.svm import SVR
 
 from sklearn.datasets import make_blobs
-from sklearn.utils import as_float_array, check_array
-from sklearn.utils.estimator_checks import NotAnArray
 from sklearn.utils.validation import (
-        NotFittedError,
-        has_fit_parameter,
-        check_is_fitted)
+    NotFittedError,
+    has_fit_parameter,
+    check_is_fitted)
+
+from sklearn.utils.testing import assert_raise_message
 
 
 def test_as_float_array():
@@ -177,7 +178,7 @@ def test_check_array():
     Xs = [X_csc, X_coo, X_dok, X_int, X_float]
     accept_sparses = [['csr', 'coo'], ['coo', 'dok']]
     for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses,
-                                                  copys):
+                                                 copys):
         X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse,
                                 copy=copy)
         if dtype is not None:
@@ -210,6 +211,55 @@ def test_check_array():
     assert_true(isinstance(result, np.ndarray))
 
 
+def test_check_array_min_samples_and_features_messages():
+    # empty list is considered 2D by default:
+    msg = "0 feature(s) (shape=(1, 0)) while a minimum of 1 is required."
+    assert_raise_message(ValueError, msg, check_array, [])
+
+    # If considered a 1D collection when ensure_2d=False, then the minimum
+    # number of samples will break:
+    msg = "0 sample(s) (shape=(0,)) while a minimum of 1 is required."
+    assert_raise_message(ValueError, msg, check_array, [], ensure_2d=False)
+
+    # Invalid edge case when checking the default minimum sample of a scalar
+    msg = "Singleton array array(42) cannot be considered a valid collection."
+    assert_raise_message(TypeError, msg, check_array, 42, ensure_2d=False)
+
+    # But this works if the input data is forced to look like a 2 array with
+    # one sample and one feature:
+    X_checked = check_array(42, ensure_2d=True)
+    assert_array_equal(np.array([[42]]), X_checked)
+
+    # Simulate a model that would need at least 2 samples to be well defined
+    X = np.ones((1, 10))
+    msg = "1 sample(s) (shape=(1, 10)) while a minimum of 2 is required."
+    assert_raise_message(ValueError, msg, check_X_y, X, y,
+                         ensure_min_samples=2)
+
+    # Simulate a model that would require at least 3 features (e.g. SelectKBest
+    # with k=3)
+    X = np.ones((10, 2))
+    y = np.ones(2)
+    msg = "2 feature(s) (shape=(10, 2)) while a minimum of 3 is required."
+    assert_raise_message(ValueError, msg, check_X_y, X, y,
+                         ensure_min_features=3)
+
+    # Simulate a case where a pipeline stage as trimmed all the features of a
+    # 2D dataset.
+    X = np.empty(0).reshape(10, 0)
+    y = np.ones(10)
+    msg = "0 feature(s) (shape=(10, 0)) while a minimum of 1 is required."
+    assert_raise_message(ValueError, msg, check_X_y, X, y)
+
+    # nd-data is not checked for any minimum number of features by default:
+    X = np.ones((10, 0, 28, 28))
+    y = np.ones(10)
+    X_checked, y_checked = check_X_y(X, y, allow_nd=True)
+    assert_array_equal(X, X_checked)
+    assert_array_equal(y, y_checked)
+
+
 def test_has_fit_parameter():
     assert_false(has_fit_parameter(KNeighborsClassifier, "sample_weight"))
     assert_true(has_fit_parameter(RandomForestRegressor, "sample_weight"))
@@ -274,6 +324,6 @@ def test_check_is_fitted():
 
     ard.fit(*make_blobs())
     svr.fit(*make_blobs())
- 
+
     assert_equal(None, check_is_fitted(ard, "coef_"))
     assert_equal(None, check_is_fitted(svr, "support_"))
@@ -110,7 +110,13 @@ def _num_samples(x):
             x = np.asarray(x)
         else:
             raise TypeError("Expected sequence or array-like, got %r" % x)
-    return x.shape[0] if hasattr(x, 'shape') else len(x)
+    if hasattr(x, 'shape'):
+        if len(x.shape) == 0:
+            raise TypeError("Singleton array %r cannot be considered"
+                            " a valid collection." % x)
+        return x.shape[0]
+    else:
+        return len(x)
 
 
 def check_consistent_length(*arrays):
@@ -222,10 +228,11 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, order, copy,
 
 
 def check_array(array, accept_sparse=None, dtype=None, order=None, copy=False,
-                force_all_finite=True, ensure_2d=True, allow_nd=False):
+                force_all_finite=True, ensure_2d=True, allow_nd=False,
+                ensure_min_samples=1, ensure_min_features=1):
     """Input validation on an array, list, sparse matrix or similar.
 
-    By default, the input is converted to an at least 2nd numpy array.
+    By default, the input is converted to an at least 2d numpy array.
 
     Parameters
     ----------
@@ -257,6 +264,16 @@ def check_array(array, accept_sparse=None, dtype=None, order=None, copy=False,
     allow_nd : boolean (default=False)
         Whether to allow X.ndim > 2.
 
+    ensure_min_samples : int (default=1)
+        Make sure that the array has a minimum number of samples in its first
+        axis (rows for a 2D array). Setting to 0 disables this check.
+
+    ensure_min_features : int (default=1)
+        Make sure that the 2D array has some minimum number of features
+        (columns). The default value of 1 rejects empty datasets.
+        This check is only enforced when ``ensure_2d`` is True and
+        ``allow_nd`` is False. Setting to 0 disables this check.
+
     Returns
     -------
     X_converted : object
@@ -278,12 +295,26 @@ def check_array(array, accept_sparse=None, dtype=None, order=None, copy=False,
         if force_all_finite:
             _assert_all_finite(array)
 
+    if ensure_min_samples > 0:
+        n_samples = _num_samples(array)
+        if n_samples < ensure_min_samples:
+            raise ValueError("Found array with %d sample(s) (shape=%r) while a"
+                             " minimum of %d is required."
+                             % (n_samples, array.shape, ensure_min_samples))
+
+    if ensure_min_features > 0 and ensure_2d and not allow_nd:
+        n_features = array.shape[1]
+        if n_features < ensure_min_features:
+            raise ValueError("Found array with %d feature(s) (shape=%r) while"
+                             " a minimum of %d is required."
+                             % (n_features, array.shape, ensure_min_features))
     return array
 
 
 def check_X_y(X, y, accept_sparse=None, dtype=None, order=None, copy=False,
               force_all_finite=True, ensure_2d=True, allow_nd=False,
-              multi_output=False):
+              multi_output=False, ensure_min_samples=1,
+              ensure_min_features=1):
     """Input validation for standard estimators.
 
     Checks X and y for consistent length, enforces X 2d and y 1d.
@@ -327,13 +358,24 @@ def check_X_y(X, y, accept_sparse=None, dtype=None, order=None, copy=False,
         Whether to allow 2-d y (array or sparse matrix). If false, y will be
         validated as a vector.
 
+    ensure_min_samples : int (default=1)
+        Make sure that X has a minimum number of samples in its first
+        axis (rows for a 2D array).
+
+    ensure_min_features : int (default=1)
+        Make sure that the 2D X has some minimum number of features
+        (columns). The default value of 1 rejects empty datasets.
+        This check is only enforced when ``ensure_2d`` is True and
+        ``allow_nd`` is False.
+
     Returns
     -------
     X_converted : object
         The converted and validated X.
     """
     X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,
-                    ensure_2d, allow_nd)
+                    ensure_2d, allow_nd, ensure_min_samples,
+                    ensure_min_features)
     if multi_output:
         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False)
     else:
@@ -353,7 +395,7 @@ def column_or_1d(y, warn=False):
     y : array-like
 
     warn : boolean, default False
-       To control display of warnings. 
+       To control display of warnings.
 
     Returns
     -------
@@ -406,6 +448,7 @@ def check_random_state(seed):
     raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
                      ' instance' % seed)
 
+
 def has_fit_parameter(estimator, parameter):
     """Checks whether the estimator's fit method supports the given parameter.
 
@@ -512,4 +555,4 @@ def check_is_fitted(estimator, attributes, msg=None, all_or_any=all):
         attributes = [attributes]
 
     if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
-        raise NotFittedError(msg % {'name' : type(estimator).__name__})
+        raise NotFittedError(msg % {'name': type(estimator).__name__})