8000 Adding out_of_bounds parameter to handle values outside training domain · scikit-learn/scikit-learn@2629fc7 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2629fc7

Browse files
committed
Adding out_of_bounds parameter to handle values outside training domain
1 parent 0e0da77 commit 2629fc7

File tree

1 file changed

+41
-4
lines changed

1 file changed

+41
-4
lines changed

sklearn/isotonic.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,13 @@ class IsotonicRegression(BaseEstimator, TransformerMixin, RegressorMixin):
183183
increase or decrease based on the Spearman correlation estimate's
184184
sign.
185185
186+
out_of_bounds : string, optional, default: "nan"
187+
The ``out_of_bounds`` parameter handles how x-values outside of the
188+
training domain are handled. When set to "nan", predicted y-values
189+
will be NaN. When set to "clip", predicted y-values will be
190+
set to the value corresponding to the nearest train interval endpoint.
191+
When set to "raise", allow ``interp1d`` to throw ValueError.
192+
186193
187194
Attributes
188195
----------
@@ -192,17 +199,25 @@ class IsotonicRegression(BaseEstimator, TransformerMixin, RegressorMixin):
192199
`y_` : ndarray (n_samples, )
193200
Isotonic fit of y.
194201
202+
`X_min_` : float
203+
Minimum value of input array X_ for left bound.
204+
205+
`X_max_` : float
206+
Maximum value of input array X_ for right bound.
207+
195208
References
196209
----------
197210
Isotonic Median Regression: A Linear Programming Approach
198211
Nilotpal Chakravarti
199212
Mathematics of Operations Research
200213
Vol. 14, No. 2 (May, 1989), pp. 303-308
201214
"""
202-
def __init__(self, y_min=None, y_max=None, increasing=True):
215+
def __init__(self, y_min=None, y_max=None, increasing=True,
216+
out_of_bounds='nan'):
203217
self.y_min = y_min
204218
self.y_max = y_max
205219
self.increasing = increasing
220+
self.out_of_bounds = out_of_bounds
206221

207222
def _check_fit_data(self, X, y, sample_weight=None):
208223
if len(X.shape) != 1:
@@ -254,6 +269,11 @@ def fit(self, X, y, sample_weight=None, weight=None):
254269
self.X_ = as_float_array(X[order], copy=False)
255270
self.y_ = isotonic_regression(y[order], sample_weight, self.y_min,
256271
self.y_max, increasing=self.increasing_)
272+
273+
# Handle the left and right bounds on X
274+
self.X_min_ = np.min(self.X_)
275+
self.X_max_ = np.max(self.X_)
276+
257277
return self
258278

259279
def transform(self, T):
@@ -273,9 +293,21 @@ def transform(self, T):
273293
if len(T.shape) != 1:
274294
raise ValueError("X should be a vector")
275295

276-
f = interpolate.interp1d(self.X_, self.y_, kind='linear',
277-
bounds_error=True)
278-
return f(T)
296+
# Only raise exception on out-of-bounds data if requested.
297+
if self.out_of_bounds == "raise":
298+
f = interpolate.interp1d(self.X_, self.y_, kind='linear',
299+
bounds_error=True)
300+
else:
301+
f = interpolate.interp1d(self.X_, self.y_, kind='linear',
302+
bounds_error=False)
303+
304+
# Clip out-of-bounds values if requested.
305+
if self.out_of_bounds == "clip":
306+
T_final = np.clip(T, self.X_min_, self.X_max_)
307+
else:
308+
T_final = T
309+
310+
return f(T_final)
279311

280312
def fit_transform(self, X, y, sample_weight=None, weight=None):
281313
"""Fit model and transform y by linear interpolation.
@@ -325,6 +357,11 @@ def fit_transform(self, X, y, sample_weight=None, weight=None):
325357
self.X_ = as_float_array(X[order], copy=False)
326358
self.y_ = isotonic_regression(y[order], sample_weight, self.y_min,
327359
self.y_max, increasing=self.increasing_)
360+
361+
# Handle the left and right bounds on X
362+
self.X_min_ = np.min(self.X_)
363+
self.X_max_ = np.max(self.X_)
364+
328365
return self.y_[order_inv]
329366

330367
def predict(self, T):

0 commit comments

Comments
 (0)
0