murata-yu
diff --git a/‎doc/modules/classes.rst
Lines changed: 1 addition & 0 deletions b/‎doc/modules/classes.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/modules/linear_model.rst
Lines changed: 77 additions & 0 deletions b/‎doc/modules/linear_model.rst
Lines changed: 77 additions & 0 deletions
diff --git a/‎doc/whats_new/v1.0.rst
Lines changed: 5 additions & 0 deletions b/‎doc/whats_new/v1.0.rst
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/linear_model/plot_quantile_regression.py
Lines changed: 110 additions & 0 deletions b/‎examples/linear_model/plot_quantile_regression.py
Lines changed: 110 additions & 0 deletions
diff --git a/‎sklearn/linear_model/__init__.py
Lines changed: 2 additions & 0 deletions b/‎sklearn/linear_model/__init__.py
Lines changed: 2 additions & 0 deletions
@@ -839,6 +839,7 @@ Any estimator using the Huber loss would also be robust to outliers, e.g.
    :template: class.rst
    linear_model.HuberRegressor
+   linear_model.QuantileRegressor
    linear_model.RANSACRegressor
    linear_model.TheilSenRegressor
 
 
@@ -1423,6 +1423,83 @@ Note that this estimator is different from the R implementation of Robust Regres
 squares implementation with weights given to each sample on the basis of how much the residual is
 greater than a certain threshold.
 
+.. _quantile_regression:
+
+Quantile Regression
+===================
+
+Quantile regression estimates the median or other quantiles of :math:`y`
+conditional on :math:`X`, while ordinary least squares (OLS) estimates the
+conditional mean.
+
+As a linear model, the :class:`QuantileRegressor` gives linear predictions
+:math:`\hat{y}(w, X) = Xw` for the :math:`q`-th quantile, :math:`q \in (0, 1)`.
+The weights or coefficients :math:`w` are then found by the following
+minimization problem:
+
+.. math::
+    \min_{w} {\frac{1}{n_{\text{samples}}}
+    \sum_i PB_q(y_i - X_i w) + \alpha ||w||_1}.
+
+This consists of the pinball loss (also known as linear loss),
+see also :class:`~sklearn.metrics.mean_pinball_loss`,
+
+.. math::
+    PB_q(t) = q \max(t, 0) + (1 - q) \max(-t, 0) =
+    \begin{cases}
+        q t, & t > 0, \\
+        0,    & t = 0, \\
+        (1-q) t, & t < 0
+    \end{cases}
+
+and the L1 penalty controlled by parameter ``alpha``, similar to
+:class:`Lasso`.
+
+As the pinball loss is only linear in the residuals, quantile regression is
+much more robust to outliers than squared error based estimation of the mean.
+Somewhat in between is the :class:`HuberRegressor`.
+
+Quantile regression may be useful if one is interested in predicting an
+interval instead of point prediction. Sometimes, prediction intervals are
+calculated based on the assumption that prediction error is distributed
+normally with zero mean and constant variance. Quantile regression provides
+sensible prediction intervals even for errors with non-constant (but
+predictable) variance or non-normal distribution.
+
+.. figure:: /auto_examples/linear_model/images/sphx_glr_plot_quantile_regression_001.png
+   :target: ../auto_examples/linear_model/plot_quantile_regression.html
+   :align: center
+   :scale: 50%
+
+Based on minimizing the pinball loss, conditional quantiles can also be
+estimated by models other than linear models. For example,
+:class:`~sklearn.ensemble.GradientBoostingRegressor` can predict conditional
+quantiles if its parameter ``loss`` is set to ``"quantile"`` and parameter
+``alpha`` is set to the quantile that should be predicted. See the example in
+:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py`.
+
+Most implementations of quantile regression are based on linear programming
+problem. The current implementation is based on
+:func:`scipy.optimize.linprog`.
+
+.. topic:: Examples:
+
+  * :ref:`sphx_glr_auto_examples_linear_model_plot_quantile_regression.py`
+
+.. topic:: References:
+
+  * Koenker, R., & Bassett Jr, G. (1978). `Regression quantiles.
+    <https://gib.people.uic.edu/RQ.pdf>`_
+    Econometrica: journal of the Econometric Society, 33-50.
+
+  * Portnoy, S., & Koenker, R. (1997). The Gaussian hare and the Laplacian
+    tortoise: computability of squared-error versus absolute-error estimators.
+    Statistical Science, 12, 279-300. https://doi.org/10.1214/ss/1030037960
+
+  * Koenker, R. (2005). Quantile Regression.
+    Cambridge University Press. https://doi.org/10.1017/CBO9780511754098
+
+
 .. _polynomial_regression:
 
 Polynomial regression: extending linear models with basis functions
 
@@ -282,6 +282,11 @@ Changelog
 :mod:`sklearn.linear_model`
 ...........................
 
+- |Feature| Added :class:`linear_model.QuantileRegressor` which implements
+  linear quantile regression with L1 penalty.
+  :pr:`9978` by :user:`David Dale <avidale>` and
+  :user:`Christian Lorentzen <lorentzenchr>`.
+
 - |Feature| The new :class:`linear_model.SGDOneClassSVM` provides an SGD
   implementation of the linear One-Class SVM. Combined with kernel
   approximation techniques, this implementation approximates the solution of
 
@@ -0,0 +1,110 @@
+"""
+===================
+Quantile regression
+===================
+This example illustrates how quantile regression can predict non-trivial
+conditional quantiles.
+
+The left figure shows the case when the error distribution is normal,
+but has non-constant variance, i.e. with heteroscedasticity.
+
+The right figure shows an example of an asymmetric error distribution,
+namely the Pareto distribution.
+"""
+print(__doc__)
+# Authors: David Dale <dale.david@mail.ru>
+#          Christian Lorentzen <lorentzen.ch@gmail.com>
+# License: BSD 3 clause
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.linear_model import QuantileRegressor, LinearRegression
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+from sklearn.model_selection import cross_val_score
+
+
+def plot_points_highlighted(x, y, model_low, model_high, ax):
+    """Plot points with highlighting."""
+    mask = y <= model_low.predict(X)
+    ax.scatter(x[mask], 
C38F
y[mask], c="k", marker="x")
+    mask = y > model_high.predict(X)
+    ax.scatter(x[mask], y[mask], c="k", marker="x")
+    mask = (y > model_low.predict(X)) & (y <= model_high.predict(X))
+    ax.scatter(x[mask], y[mask], c="k")
+
+
+fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5), sharey=True)
+
+rng = np.random.RandomState(42)
+x = np.linspace(0, 10, 100)
+X = x[:, np.newaxis]
+y = 10 + 0.5 * x + rng.normal(loc=0, scale=0.5 + 0.5 * x, size=x.shape[0])
+y_mean = 10 + 0.5 * x
+ax1.plot(x, y_mean, "k--")
+
+quantiles = [0.05, 0.5, 0.95]
+models = []
+for quantile in quantiles:
+    qr = QuantileRegressor(quantile=quantile, alpha=0)
+    qr.fit(X, y)
+    ax1.plot(x, qr.predict(X))
+    models.append(qr)
+
+plot_points_highlighted(x, y, models[0], models[2], ax1)
+ax1.set_xlabel("x")
+ax1.set_ylabel("y")
+ax1.set_title("Quantiles of heteroscedastic Normal distributed target")
+ax1.legend(["true mean"] + quantiles)
+
+
+a = 5
+y = 10 + 0.5 * x + 10 * (rng.pareto(a, size=x.shape[0]) - 1 / (a - 1))
+ax2.plot(x, y_mean, "k--")
+
+models = []
+for quantile in quantiles:
+    qr = QuantileRegressor(quantile=quantile, alpha=0)
+    qr.fit(X, y)
+    ax2.plot([0, 10], qr.predict([[0], [10]]))
+    models.append(qr)
+
+plot_points_highlighted(x, y, models[0], models[2], ax2)
+ax2.set_xlabel("x")
+ax2.set_ylabel("y")
+ax2.set_title("Quantiles of asymmetric Pareto distributed target")
+ax2.legend(["true mean"] + quantiles, loc="lower right")
+ax2.yaxis.set_tick_params(labelbottom=True)
+
+plt.show()
+
+# %%
+# Note that both targets have the same mean value, indicated by the dashed
+# black line. As the Normal distribution is symmetric, mean and median are
+# identical and the predicted 0.5 quantile almost hits the true mean.
+# In the Pareto case, the difference between predicted median and true mean
+# is evident. We also marked the points below the 0.05 and above 0.95
+# predicted quantiles by small crosses. You might count them and consider
+# that we have 100 samples in total.
+#
+# The second part of the example shows that LinearRegression minimizes MSE
+# in order to predict the mean, while QuantileRegressor with `quantile=0.5`
+# minimizes MAE in order to predict the median. Both do their own job well.
+
+models = [LinearRegression(), QuantileRegressor(alpha=0)]
+names = ["OLS", "Quantile"]
+
+print("# In-sample performance")
+for model_name, model in zip(names, models):
+    print(model_name + ":")
+    model.fit(X, y)
+    mae = mean_absolute_error(model.predict(X), y)
+    rmse = np.sqrt(mean_squared_error(model.predict(X), y))
+    print(f"MAE = {mae:.4}  RMSE = {rmse:.4}")
+print("\n# Cross-validated performance")
+for model_name, model in zip(names, models):
+    print(model_name + ":")
+    mae = -cross_val_score(model, X, y, cv=3,
+                           scoring="neg_mean_absolute_error").mean()
+    rmse = np.sqrt(-cross_val_score(model, X, y, cv=3,
+                                    scoring="neg_mean_squared_error").mean())
+    print(f"MAE = {mae:.4}  RMSE = {rmse:.4}")
@@ -28,6 +28,7 @@
 from ._passive_aggressive import PassiveAggressiveRegressor
 from ._perceptron import Perceptron
 
+from ._quantile import QuantileRegressor
 from ._ransac import RANSACRegressor
 from ._theil_sen import TheilSenRegressor
 
@@ -59,6 +60,7 @@
            'PassiveAggressiveClassifier',
            'PassiveAggressiveRegressor',
            'Perceptron',
+           'QuantileRegressor',
            'Ridge',
            'RidgeCV',
            'RidgeClassifier',