rmurcek
diff --git a/‎doc/modules/model_evaluation.rst
Lines changed: 8 additions & 2 deletions b/‎doc/modules/model_evaluation.rst
Lines changed: 8 additions & 2 deletions
diff --git a/‎sklearn/dummy.py
Lines changed: 62 additions & 8 deletions b/‎sklearn/dummy.py
Lines changed: 62 additions & 8 deletions
diff --git a/‎sklearn/tests/test_dummy.py
Lines changed: 144 additions & 10 deletions b/‎sklearn/tests/test_dummy.py
Lines changed: 144 additions & 10 deletions
@@ -1146,5 +1146,11 @@ classification, it probably means that something went wrong: features are not
 helpful, a hyper parameter is not correctly tuned, the classifier is suffering
 from class imbalance, etc...
 
-:class:`DummyRegressor` implements a simple rule of thumb for regression:
-always predict the mean of the training targets.
+:class:`DummyRegressor` also implements three simple rules of thumb for regression:
+
+- `mean` always predicts the mean of the training targets.
+- `median` always predicts the median of the training targests.
+- `constant` always predicts a constant value that is provided by the user.
+
+In all these strategies, the `predict` method completely ignores
+the input data.
@@ -1,5 +1,6 @@
 # Author: Mathieu Blondel <mathieu@mblondel.org>
 #         Arnaud Joly <a.joly@ulg.ac.be>
+#         Maheshakya Wijewardena<maheshakya.10@cse.mrt.ac.lk>
 # License: BSD 3 clause
 
 import numpy as np
@@ -8,6 +9,7 @@
 from .externals.six.moves import xrange
 from .utils import check_random_state
 from .utils.validation import safe_asarray
+from sklearn.utils import deprecated
 
 
 class DummyClassifier(BaseEstimator, ClassifierMixin):
@@ -272,16 +274,30 @@ def predict_log_proba(self, X):
 
 class DummyRegressor(BaseEstimator, RegressorMixin):
     """
-    DummyRegressor is a regressor that always predicts the mean of the training
-    targets.
+    DummyRegressor is a regressor that makes predictions using
+    simple rules.
 
     This regressor is useful as a simple baseline to compare with other
     (real) regressors. Do not use it for real problems.
 
+    Parameters
+    ----------
+    strategy: str
+        Strategy to use to generate predictions.
+            * "mean": always predicts the mean of the training set
+            * "median": always predicts the median of the training set
+            * "constant": always predicts a constant value that is provided by
+              the user.
+
+    constant: int or float or array of shape = [n_outputs]
+        The explicit constant as predicted by the "constant" strategy. This
+        parameter is useful only for the "constant" strategy.
+
     Attributes
     ----------
-    `y_mean_` : float or array of shape [n_outputs]
-        Mean of the training targets.
+    `constant_' : float or array of shape [n_outputs]
+        Mean or median of the training targets or constant value given the by
+        the user.
 
     `n_outputs_` : int,
         Number of outputs.
@@ -290,6 +306,17 @@ class DummyRegressor(BaseEstimator, RegressorMixin):
         True if the output at fit is 2d, else false.
     """
 
+    def __init__(self, strategy="mean", constant=None):
+        self.strategy = strategy
+        self.constant = constant
+
+    @property
+    @deprecated('This will be removed in version 0.17')
+    def y_mean_(self):
+        if self.strategy == 'mean':
+            return self.constant_
+        raise AttributeError
+
     def fit(self, X, y):
         """Fit the random regressor.
 
@@ -307,10 +334,36 @@ def fit(self, X, y):
         self : object
             Returns self.
         """
+
+        if self.strategy not in ("mean", "median", "constant"):
+            raise ValueError("Unknown strategy type: %s, "
+                             "expected 'mean', 'median' or 'constant'"
+                             % self.strategy)
+
         y = safe_asarray(y)
-        self.y_mean_ = np.reshape(np.mean(y, axis=0), (1, -1))
-        self.n_outputs_ = np.size(self.y_mean_)  # y.shape[1] is not safe
         self.output_2d_ = (y.ndim == 2)
+
+        if self.strategy == "mean":
+            self.constant_ = np.reshape(np.mean(y, axis=0), (1, -1))
+
+        elif self.strategy == "median":
+            self.constant_ = np.reshape(np.median(y, axis=0), (1, -1))
+
+        elif self.strategy == "constant":
+            if self.constant is None:
+                raise TypeError("Constant target value has to be specified "
+                                "when the constant strategy is used.")
+
+            self.constant = safe_asarray(self.constant)
+
+            if self.output_2d_ and self.constant.shape[0] != y.shape[1]:
+                raise ValueError(
+                    "Constant target value should have "
+                    "shape (%d, 1)." % y.shape[1])
+
+            self.constant_ = np.reshape(self.constant, (1, -1))
+
+        self.n_outputs_ = np.size(self.constant_)  # y.shape[1] is not safe
         return self
 
     def predict(self, X):
@@ -328,12 +381,13 @@ def predict(self, X):
         y : array, shape = [n_samples]  or [n_samples, n_outputs]
             Predicted target values for X.
         """
-        if not hasattr(self, "y_mean_"):
+        if not hasattr(self, "constant_"):
             raise ValueError("DummyRegressor not fitted.")
 
         X = safe_asarray(X)
         n_samples = X.shape[0]
-        y = np.ones((n_samples, 1)) * self.y_mean_
+
+        y = np.ones((n_samples, 1)) * self.constant_
 
         if self.n_outputs_ == 1 and not self.output_2d_:
             y = np.ravel(y)
 
@@ -59,6 +59,27 @@ def _check_behavior_2d(clf):
     assert_equal(y.shape, y_pred.shape)
 
 
+def _check_behavior_2d_for_constant(clf):
+    # 2d case only
+    X = np.array([[0], [0], [0], [0]])  # ignored
+    y = np.array([[1, 0, 5, 4, 3],
+                  [2, 0, 1, 2, 5],
+                  [1, 0, 4, 5, 2],
+                  [1, 3, 3, 2, 0]])
+    est = clone(clf)
+    est.fit(X, y)
+    y_pred = est.predict(X)
+    assert_equal(y.shape, y_pred.shape)
+
+
+def _check_equality_regressor(statistic, y_learn, y_pred_learn,
+                              y_test, y_pred_test):
+    assert_array_equal(np.tile(statistic, (y_learn.shape[0], 1)),
+                       y_pred_learn)
+    assert_array_equal(np.tile(statistic, (y_test.shape[0], 1)),
+                       y_pred_test)
+
+
 def test_most_frequent_strategy():
     X = [[0], [0], [0], [0]]  # ignored
     y = [1, 2, 1, 1]
@@ -175,33 +196,37 @@ def test_classifier_exceptions():
     assert_raises(ValueError, clf.predict_proba, [])
 
 
-def test_regressor():
+def test_mean_strategy_regressor():
+
+    random_state = np.random.RandomState(seed=1)
+
     X = [[0]] * 4  # ignored
-    y = [1, 2, 1, 1]
+    y = random_state.randn(4)
 
     reg = DummyRegressor()
     reg.fit(X, y)
-    assert_array_equal(reg.predict(X), [5. / 4] * len(X))
+    assert_array_equal(reg.predict(X), [np.mean(y)] * len(X))
 
 
-def test_multioutput_regressor():
+def test_mean_strategy_multioutput_regressor():
 
-    X_learn = np.random.randn(10, 10)
-    y_learn = np.random.randn(10, 5)
+    random_state = np.random.RandomState(seed=1)
+
+    X_learn = random_state.randn(10, 10)
+    y_learn = random_state.randn(10, 5)
 
     mean = np.mean(y_learn, axis=0).reshape((1, -1))
 
-    X_test = np.random.randn(20, 10)
-    y_test = np.random.randn(20, 5)
+    X_test = random_state.randn(20, 10)
+    y_test = random_state.randn(20, 5)
 
     # Correctness oracle
     est = DummyRegressor()
     est.fit(X_learn, y_learn)
     y_pred_learn = est.predict(X_learn)
     y_pred_test = est.predict(X_test)
 
-    assert_array_equal(np.tile(mean, (y_learn.shape[0], 1)), y_pred_learn)
-    assert_array_equal(np.tile(mean, (y_test.shape[0], 1)), y_pred_test)
+    _check_equality_regressor(mean, y_learn, y_pred_learn, y_test, y_pred_test)
     _check_behavior_2d(est)
 
 
@@ -210,6 +235,115 @@ def test_regressor_exceptions():
     assert_raises(ValueError, reg.predict, [])
 
 
+def test_median_strategy_regressor():
+
+    random_state = np.random.RandomState(seed=1)
+
+    X = [[0]] * 5  # ignored
+    y = random_state.randn(5)
+
+    reg = DummyRegressor(strategy="median")
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [np.median(y)] * len(X))
+
+
+def test_median_strategy_multioutput_regressor():
+
+    random_state = np.random.RandomState(seed=1)
+
+    X_learn = random_state.randn(10, 10)
+    y_learn = random_state.randn(10, 5)
+
+    median = np.median(y_learn, axis=0).reshape((1, -1))
+
+    X_test = random_state.randn(20, 10)
+    y_test = random_state.randn(20, 5)
+
+    # Correctness oracle
+    est = DummyRegressor(strategy="median")
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(
+        median, y_learn, y_pred_learn, y_test, y_pred_test)
+    _check_behavior_2d(est)
+
+
+def test_constant_strategy_regressor():
+
+    random_state = np.random.RandomState(seed=1)
+
+    X = [[0]] * 5  # ignored
+    y = random_state.randn(5)
+
+    reg = DummyRegressor(strategy="constant", constant=[43])
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [43] * len(X))
+
+    reg = DummyRegressor(strategy="constant", constant=43)
+    reg.fit(X, y)
+    assert_array_equal(reg.predict(X), [43] * len(X))
+
+
+def test_constant_strategy_multioutput_regressor():
+
+    random_state = np.random.RandomState(seed=1)
+
+    X_learn = random_state.randn(10, 10)
+    y_learn = random_state.randn(10, 5)
+
+    # test with 2d array
+    constants = random_state.randn(5)
+
+    X_test = random_state.randn(20, 10)
+    y_test = random_state.randn(20, 5)
+
+    # Correctness oracle
+    est = DummyRegressor(strategy="constant", constant=constants)
+    est.fit(X_learn, y_learn)
+    y_pred_learn = est.predict(X_learn)
+    y_pred_test = est.predict(X_test)
+
+    _check_equality_regressor(
+        constants, y_learn, y_pred_learn, y_test, y_pred_test)
+    _check_behavior_2d_for_constant(est)
+
+
+def test_y_mean_attribute_regressor():
+    X = [[0]] * 5
+    y = [1, 2, 4, 6, 8]
+    # when strategy = 'mean'
+    est = DummyRegressor(strategy='mean')
+    est.fit(X, y)
+    assert_equal(est.y_mean_, np.mean(y))
+
+
+def test_unknown_strategey_regressor():
+    X = [[0]] * 5
+    y = [1, 2, 4, 6, 8]
+
+    est = DummyRegressor(strategy='gona')
+    assert_raises(ValueError, est.fit, X, y)
+
+
+def test_constants_not_specified_regressor():
+    X = [[0]] * 5
+    y = [1, 2, 4, 6, 8]
+
+    est = DummyRegressor(strategy='constant')
+    assert_raises(TypeError, est.fit, X, y)
+
+
+def test_constant_size_multioutput_regressor():
+    random_state = np.random.RandomState(seed=1)
+    X = random_state.randn(10, 10)
+    y = random_state.randn(10, 5)
+
+    est = DummyRegressor(strategy='constant', constant=[1, 2, 3, 4])
+    assert_raises(ValueError, est.fit, X, y)
+
+
 def test_constant_strategy():
     X = [[0], [0], [0], [0]]  # ignored
     y = [2, 1, 2, 2]