r2k0
diff --git a/‎doc/modules/model_evaluation.rst
Lines changed: 4 additions & 1 deletion b/‎doc/modules/model_evaluation.rst
Lines changed: 4 additions & 1 deletion
diff --git a/‎sklearn/dummy.py
Lines changed: 43 additions & 3 deletions b/‎sklearn/dummy.py
Lines changed: 43 additions & 3 deletions
diff --git a/‎sklearn/tests/test_dummy.py
Lines changed: 45 additions & 0 deletions b/‎sklearn/tests/test_dummy.py
Lines changed: 45 additions & 0 deletions
@@ -1073,6 +1073,9 @@ implements three such simple strategies for classification:
   set's class distribution,
 - `most_frequent` always predicts the most frequent label in the training set,
 - `uniform` generates predictions uniformly at random.
+- `constant` always predicts a constant label that is provided by the user.
+   A major motivation of this method is F1-scoring when the positive class
+   is in the minority.
 
 Note that with all these strategies, the `predict` method completely ignores
 the input data!
@@ -1096,7 +1099,7 @@ Next, let's compare the accuracy of `SVC` and `most_frequent`::
   0.63...
   >>> clf = DummyClassifier(strategy='most_frequent',random_state=0)
   >>> clf.fit(X_train, y_train)
-  DummyClassifier(random_state=0, strategy='most_frequent')
+  DummyClassifier(constant=None, random_state=0, strategy='most_frequent')
   >>> clf.score(X_test, y_test)  # doctest: +ELLIPSIS
   0.57...
 
 
@@ -28,10 +28,17 @@ class DummyClassifier(BaseEstimator, ClassifierMixin):
             * "most_frequent": always predicts the most frequent label in the
               training set.
             * "uniform": generates predictions uniformly at random.
+            * "constant": always predicts a constant label that is provided by
+              the user. This is useful for metrics that evaluate a non-majority
+              class
 
     random_state: int seed, RandomState instance, or None (default)
         The seed of the pseudo random number generator to use.
 
+    constant: int or str or array of shape = [n_outputs]
+        The explicit constant as predicted by the "constant" strategy. This
+        parameter is useful only for the "constant" strategy.
+
     Attributes
     ----------
     `classes_` : array or list of array of shape = [n_classes]
@@ -48,11 +55,14 @@ class DummyClassifier(BaseEstimator, ClassifierMixin):
 
     `outputs_2d_` : bool,
         True if the output at fit is 2d, else false.
+
     """
 
-    def __init__(self, strategy="stratified", random_state=None):
+    def __init__(self, strategy="stratified", random_state=None,
+                 constant=None):
         self.strategy = strategy
         self.random_state = random_state
+        self.constant = constant
 
     def fit(self, X, y):
         """Fit the random classifier.
@@ -71,7 +81,8 @@ def fit(self, X, y):
         self : object
             Returns self.
         """
-        if self.strategy not in ("most_frequent", "stratified", "uniform"):
+        if self.strategy not in ("most_frequent", "stratified", "uniform",
+                                 "constant"):
             raise ValueError("Unknown strategy type.")
 
         y = np.atleast_1d(y)
@@ -85,12 +96,29 @@ def fit(self, X, y):
         self.n_classes_ = []
         self.class_prior_ = []
 
+        if self.strategy == "constant":
+            if self.constant is None:
+                raise ValueError("Constant target value has to be specified "
+                                 "when the constant strategy is used.")
+            else:
+                constant = np.reshape(np.atleast_1d(self.constant), (-1, 1))
+                if constant.shape[0] != self.n_outputs_:
+                    raise ValueError("Constant target value should have "
+                                     "shape (%d, 1)." % self.n_outputs_)
+
         for k in xrange(self.n_outputs_):
             classes, y_k = unique(y[:, k], return_inverse=True)
             self.classes_.append(classes)
             self.n_classes_.append(classes.shape[0])
             self.class_prior_.append(np.bincount(y_k) / float(y_k.shape[0]))
 
+            # Checking in case of constant strategy if the constant provided
+            # by the user is in y. 
+            if self.strategy == "constant":
+                if constant[k] not in self.classes_[k]:
+                    raise ValueError("The constant target value must be "
+                                     "present in training data")
+
         if self.n_outputs_ == 1 and not self.output_2d_:
             self.n_classes_ = self.n_classes_[0]
             self.classes_ = self.classes_[0]
@@ -123,12 +151,13 @@ def predict(self, X):
         n_classes_ = self.n_classes_
         classes_ = self.classes_
         class_prior_ = self.class_prior_
+        constant = self.constant
         if self.n_outputs_ == 1:
             # Get same type even for self.n_outputs_ == 1
             n_classes_ = [n_classes_]
             classes_ = [classes_]
             class_prior_ = [class_prior_]
-
+            constant = [constant]
         # Compute probability only once
         if self.strategy == "stratified":
             proba = self.predict_proba(X)
@@ -146,6 +175,10 @@ def predict(self, X):
             elif self.strategy == "uniform":
                 ret = rs.randint(n_classes_[k], size=n_samples)
 
+            elif self.strategy == "constant":
+                ret = np.ones(n_samples, dtype=int) * (
+                      np.where(classes_[k] == constant[k]))
+
             y.append(classes_[k][ret])
 
         y = np.vstack(y).T
@@ -181,11 +214,13 @@ def predict_proba(self, X):
         n_classes_ = self.n_classes_
         classes_ = self.classes_
         class_prior_ = self.class_prior_
+        constant = self.constant
         if self.n_outputs_ == 1 and not self.output_2d_:
             # Get same type even for self.n_outputs_ == 1
             n_classes_ = [n_classes_]
             classes_ = [classes_]
             class_prior_ = [class_prior_]
+            constant = [constant]
 
         P = []
         for k in xrange(self.n_outputs_):
@@ -201,6 +236,11 @@ def predict_proba(self, X):
                 out = np.ones((n_samples, n_classes_[k]), dtype=np.float64)
                 out /= n_classes_[k]
 
+            elif self.strategy == "constant":
+                ind = np.where(classes_[k] == constant[k])
+                out = np.zeros((n_samples, n_classes_[k]), dtype=np.float64)
+                out[:, ind] = 1.0
+
             P.append(out)
 
         if self.n_outputs_ == 1 and not self.output_2d_:
 
@@ -208,3 +208,48 @@ def test_multioutput_regressor():
 def test_regressor_exceptions():
     reg = DummyRegressor()
     assert_raises(ValueError, reg.predict, [])
+
+
+def test_constant_strategy():
+    X = [[0], [0], [0], [0]]  # ignored
+    y = [2, 1, 2, 2]
+
+    clf = DummyClassifier(strategy="constant", random_state=0, constant=1)
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(X), np.ones(len(X)))
+    _check_predict_proba(clf, X, y)
+
+    X = [[0], [0], [0], [0]]  # ignored
+    y = ['two', 'one', 'two', 'two']
+    clf = DummyClassifier(strategy="constant", random_state=0, constant='one')
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(X), np.array(['one']*4))
+    _check_predict_proba(clf, X, y)
+
+
+def test_constant_strategy_multioutput():
+    X = [[0], [0], [0], [0]]  # ignored
+    y = np.array([[2, 3],
+                  [1, 3],
+                  [2, 3],
+                  [2, 0]])
+
+    n_samples = len(X)
+
+    clf = DummyClassifier(strategy="constant", random_state=0,
+                          constant=[1, 0])
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(X),
+                       np.hstack([np.ones((n_samples, 1)),
+                                  np.zeros((n_samples, 1))]))
+    _check_predict_proba(clf, X, y)
+
+
+def test_constant_strategy_exceptions():
+    X = [[0], [0], [0], [0]]  # ignored
+    y = [2, 1, 2, 2]
+    clf = DummyClassifier(strategy="constant", random_state=0)
+    assert_raises(ValueError, clf.fit, X, y)
+    clf = DummyClassifier(strategy="constant", random_state=0,
+                          constant=[2,0])
+    assert_raises(ValueError, clf.fit, X, y)