8000 FEA Add writeable parameter to check_array (#29018) · scikit-learn/scikit-learn@9c9f106 · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit 9c9f106

Browse files
jeremiedbbogrisel
andcommitted
FEA Add writeable parameter to check_array (#29018)
Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
1 parent ded9890 commit 9c9f106

File tree

19 files changed

+294
-37
lines changed
  • 19 files changed

    +294
    -37
    lines changed

    doc/whats_new/v1.5.rst

    Lines changed: 19 additions & 3 deletions
    Original file line numberDiff line numberDiff line change
    @@ -20,15 +20,19 @@ Version 1.5.1
    2020

    2121
    **TODO**
    2222

    23-
    Changelog
    24-
    ---------
    25-
    2623
    Changes impacting many modules
    2724
    ------------------------------
    2825

    26+
    - |Fix| Fixed a regression in the validation of the input data of all estimators where
    27+
    an unexpected error was raised when passing a DataFrame backed by a read-only buffer.
    28+
    :pr:`29018` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
    29+
    2930
    - |Fix| Fixed a regression causing a dead-lock at import time in some settings.
    3031
    :pr:`29235` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
    3132

    33+
    Changelog
    34+
    ---------
    35+
    3236
    :mod:`sklearn.metrics`
    3337
    ......................
    3438

    @@ -37,6 +41,10 @@ Changes impacting many modules
    3741
    instead of implicitly converting those inputs as regular NumPy arrays.
    3842
    :pr:`29119` by :user:`Olivier Grisel`.
    3943

    44+
    - |Fix| Fix a regression in :func:`metrics.zero_one_loss` causing an error
    45+
    for Array API dispatch with multilabel inputs.
    46+
    :pr:`29269` by :user:`Yaroslav Korobko <Tialo>`.
    47+
    4048
    :mod:`sklearn.model_selection`
    4149
    ..............................
    4250

    @@ -48,6 +56,14 @@ Changes impacting many modules
    4856
    grids that have estimators as parameter values.
    4957
    :pr:`29179` by :user:`Marco Gorelli<MarcoGorelli>`.
    5058

    59+
    :mod:`sklearn.utils`
    60+
    ....................
    61+
    62+
    - |API| :func:`utils.validation.check_array` has a new parameter, `force_writeable`, to
    63+
    control the writeability of the output array. If set to `True`, the output array will
    64+
    be guaranteed to be writeable and a copy will be made if the input array is read-only.
    65+
    If set to `False`, no guarantee is made about the writeability of the output array.
    66+
    :pr:`29018` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
    5167

    5268
    .. _changes_1_5:
    5369

    sklearn/cluster/_affinity_propagation.py

    Lines changed: 3 additions & 6 deletions
    Original file line numberDiff line numberDiff line change
    @@ -504,13 +504,10 @@ def fit(self, X, y=None):
    504504
    Returns the instance itself.
    505505
    """
    506506
    if self.affinity == "precomputed":
    507-
    accept_sparse = False
    508-
    else:
    509-
    accept_sparse = "csr"
    510-
    X = self._validate_data(X, accept_sparse=accept_sparse)
    511-
    if self.affinity == "precomputed":
    512-
    self.affinity_matrix_ = X.copy() if self.copy else X
    507+
    X = self._validate_data(X, copy=self.copy, force_writeable=True)
    508+
    self.affinity_matrix_ = X
    513509
    else: # self.affinity == "euclidean"
    510+
    X = self._validate_data(X, accept_sparse="csr")
    514511
    self.affinity_matrix_ = -euclidean_distances(X, squared=True)
    515512

    516513
    if self.affinity_matrix_.shape[0] != self.affinity_matrix_.shape[1]:

    sklearn/cluster/_hdbscan/hdbscan.py

    Lines changed: 4 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -770,14 +770,17 @@ def fit(self, X, y=None):
    770770
    X,
    771771
    accept_sparse=["csr", "lil"],
    772772
    dtype=np.float64,
    773+
    force_writeable=True,
    773774
    )
    774775
    else:
    775776
    # Only non-sparse, precomputed distance matrices are handled here
    776777
    # and thereby allowed to contain numpy.inf for missing distances
    777778

    778779
    # Perform data validation after removing infinite values (numpy.inf)
    779780
    # from the given distance matrix.
    780-
    X = self._validate_data(X, force_all_finite=False, dtype=np.float64)
    781+
    X = self._validate_data(
    782+
    X, force_all_finite=False, dtype=np.float64, force_writeable=True
    783+
    )
    781784
    if np.isnan(X).any():
    782785
    # TODO: Support np.nan in Cython implementation for precomputed
    783786
    # dense HDBSCAN

    sklearn/cross_decomposition/_pls.py

    Lines changed: 22 additions & 4 deletions
    Original file line numberDiff line numberDiff line change
    @@ -263,10 +263,19 @@ def fit(self, X, y=None, Y=None):
    263263

    264264
    check_consistent_length(X, y)
    265265
    X = self._validate_data(
    266-
    X, dtype=np.float64, copy=self.copy, ensure_min_samples=2
    266+
    X,
    267+
    dtype=np.float64,
    268+
    force_writeable=True,
    269+
    copy=self.copy,
    270+
    ensure_min_samples=2,
    267271
    )
    268272
    y = check_array(
    269-
    y, input_name="y", dtype=np.float64, copy=self.copy, ensure_2d=False
    273+
    y,
    274+
    input_name="y",
    275+
    dtype=np.float64,
    276+
    force_writeable=True,
    277+
    copy=self.copy,
    278+
    ensure_2d=False,
    270279
    )
    271280
    if y.ndim == 1:
    272281
    self._predict_1d = True
    @@ -1056,10 +1065,19 @@ def fit(self, X, y=None, Y=None):
    10561065
    y = _deprecate_Y_when_required(y, Y)
    10571066
    check_consistent_length(X, y)
    10581067
    X = self._validate_data(
    1059-
    X, dtype=np.float64, copy=self.copy, ensure_min_samples=2
    1068+
    X,
    1069+
    dtype=np.float64,
    1070+
    force_writeable=True,
    1071+
    copy=self.copy,
    1072+
    ensure_min_samples=2,
    10601073
    )
    10611074
    y = check_array(
    1062-
    y, input_name="y", dtype=np.float64, copy=self.copy, ensure_2d=False
    1075+
    y,
    1076+
    input_name="y",
    1077+
    dtype=np.float64,
    1078+
    force_writeable=True,
    1079+
    copy=self.copy,
    1080+
    ensure_2d=False,
    10631081
    )
    10641082
    if y.ndim == 1:
    10651083
    y = y.reshape(-1, 1)

    sklearn/decomposition/_factor_analysis.py

    Lines changed: 3 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -219,7 +219,9 @@ def fit(self, X, y=None):
    219219
    self : object
    220220
    FactorAnalysis class instance.
    221221
    """
    222-
    X = self._validate_data(X, copy=self.copy, dtype=np.float64)
    222+
    X = self._validate_data(
    223+
    X, copy=self.copy, dtype=np.float64, force_writeable=True
    224+
    )
    223225

    224226
    n_samples, n_features = X.shape
    225227
    n_components = self.n_components

    sklearn/decomposition/_incremental_pca.py

    Lines changed: 6 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -229,6 +229,7 @@ def fit(self, X, y=None):
    229229
    accept_sparse=["csr", "csc", "lil"],
    230230
    copy=self.copy,
    231231
    dtype=[np.float64, np.float32],
    232+
    force_writeable=True,
    232233
    )
    233234
    n_samples, n_features = X.shape
    234235

    @@ -278,7 +279,11 @@ def partial_fit(self, X, y=None, check_input=True):
    278279
    "or use IncrementalPCA.fit to do so in batches."
    279280
    )
    280281
    X = self._validate_data(
    281-
    X, copy=self.copy, dtype=[np.float64, np.float32], reset=first_pass
    282+
    X,
    283+
    copy=self.copy,
    284+
    dtype=[np.float64, np.float32],
    285+
    force_writeable=True,
    286+
    reset=first_pass,
    282287
    )
    283288
    n_samples, n_features = X.shape
    284289
    if first_pass:

    sklearn/decomposition/_pca.py

    Lines changed: 1 addition & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -511,6 +511,7 @@ def _fit(self, X):
    511511
    X = self._validate_data(
    512512
    X,
    513513
    dtype=[xp.float64, xp.float32],
    514+
    force_writeable=True,
    514515
    accept_sparse=("csr", "csc"),
    515516
    ensure_2d=True,
    516517
    copy=False,

    sklearn/impute/_base.py

    Lines changed: 1 addition & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -334,6 +334,7 @@ def _validate_input(self, X, in_fit):
    334334
    reset=in_fit,
    335335
    accept_sparse="csc",
    336336
    dtype=dtype,
    337+
    force_writeable=True if not in_fit else None,
    337338
    force_all_finite=force_all_finite,
    338339
    copy=self.copy,
    339340
    )

    sklearn/impute/_knn.py

    Lines changed: 1 addition & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -267,6 +267,7 @@ def transform(self, X):
    267267
    X,
    268268
    accept_sparse=False,
    269269
    dtype=FLOAT_DTYPES,
    270+
    force_writeable=True,
    270271
    force_all_finite=force_all_finite,
    271272
    copy=self.copy,
    272273
    reset=False,

    sklearn/linear_model/_base.py

    Lines changed: 6 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -607,7 +607,12 @@ def fit(self, X, y, sample_weight=None):
    607607
    accept_sparse = False if self.positive else ["csr", "csc", "coo"]
    608608

    609609
    X, y = self._validate_data(
    610-
    X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True
    610+
    X,
    611+
    y,
    612+
    accept_sparse=accept_sparse,
    613+
    y_numeric=True,
    614+
    multi_output=True,
    615+
    force_writeable=True,
    611616
    )
    612617

    613618
    has_sw = sample_weight is not None

    sklearn/linear_model/_bayes.py

    Lines changed: 9 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -235,7 +235,9 @@ def fit(self, X, y, sample_weight=None):
    235235
    self : object
    236236
    Returns the instance itself.
    237237
    """
    238-
    X, y = self._validate_data(X, y, dtype=[np.float64, np.float32], y_numeric=True)
    238+
    X, y = self._validate_data(
    239+
    X, y, dtype=[np.float64, np.float32], force_writeable=True, y_numeric=True
    240+
    )
    239241
    dtype = X.dtype
    240242

    241243
    if sample_weight is not None:
    @@ -620,7 +622,12 @@ def fit(self, X, y):
    620622
    Fitted estimator.
    621623
    """
    622624
    X, y = self._validate_data(
    623-
    X, y, dtype=[np.float64, np.float32], y_numeric=True, ensure_min_samples=2
    625+
    X,
    626+
    y,
    627+
    dtype=[np.float64, np.float32],
    628+
    force_writeable=True,
    629+
    y_numeric=True,
    630+
    ensure_min_samples=2,
    624631
    )
    625632
    dtype = X.dtype
    626633

    sklearn/linear_model/_coordinate_descent.py

    Lines changed: 4 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -983,6 +983,7 @@ def fit(self, X, y, sample_weight=None, check_input=True):
    983983
    accept_sparse="csc",
    984984
    order="F",
    985985
    dtype=[np.float64, np.float32],
    986+
    force_writeable=True,
    986987
    accept_large_sparse=False,
    987988
    copy=X_copied,
    988989
    multi_output=True,
    @@ -1611,6 +1612,7 @@ def fit(self, X, y, sample_weight=None, **params):
    16111612
    check_X_params = dict(
    16121613
    accept_sparse="csc",
    16131614
    dtype=[np.float64, np.float32],
    1615+
    force_writeable=True,
    16141616
    copy=False,
    16151617
    accept_large_sparse=False,
    16161618
    )
    @@ -1636,6 +1638,7 @@ def fit(self, X, y, sample_weight=None, **params):
    16361638
    accept_sparse="csc",
    16371639
    dtype=[np.float64, np.float32],
    16381640
    order="F",
    1641+
    force_writeable=True,
    16391642
    copy=copy_X,
    16401643
    )
    16411644
    X, y = self._validate_data(
    @@ -2512,6 +2515,7 @@ def fit(self, X, y):
    25122515
    check_X_params = dict(
    25132516
    dtype=[np.float64, np.float32],
    25142517
    order="F",
    2518+
    force_writeable=True,
    25152519
    copy=self.copy_X and self.fit_intercept,
    25162520
    )
    25172521
    check_y_params = dict(ensure_2d=False, order="F")

    sklearn/linear_model/_least_angle.py

    Lines changed: 5 additions & 3 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1180,7 +1180,9 @@ def fit(self, X, y, Xy=None):
    11801180
    self : object
    11811181
    Returns an instance of self.
    11821182
    """
    1183-
    X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)
    1183+
    X, y = self._validate_data(
    1184+
    X, y, force_writeable=True, y_numeric=True, multi_output=True
    1185+
    )
    11841186

    11851187
    alpha = getattr(self, "alpha", 0.0)
    11861188
    if hasattr(self, "n_nonzero_coefs"):
    @@ -1721,7 +1723,7 @@ def fit(self, X, y, **params):
    17211723
    """
    17221724
    _raise_for_params(params, self, "fit")
    17231725

    1724-
    X, y = self._validate_data(X, y, y_numeric=True)
    1726+
    X, y = self._validate_data(X, y, force_writeable=True, y_numeric=True)
    17251727
    X = as_float_array(X, copy=self.copy_X)
    17261728
    y = as_float_array(y, copy=self.copy_X)
    17271729

    @@ -2238,7 +2240,7 @@ def fit(self, X, y, copy_X=None):
    22382240
    """
    22392241
    if copy_X is None:
    22402242
    copy_X = self.copy_X
    2241-
    X, y = self._validate_data(X, y, y_numeric=True)
    2243+
    X, y = self._validate_data(X, y, force_writeable=True, y_numeric=True)
    22422244

    22432245
    X, y, Xmean, ymean, Xstd = _preprocess_data(
    22442246
    X, y, fit_intercept=self.fit_intercept, copy=copy_X

    sklearn/linear_model/_ridge.py

    Lines changed: 2 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1244,6 +1244,7 @@ def fit(self, X, y, sample_weight=None):
    12441244
    y,
    12451245
    accept_sparse=_accept_sparse,
    12461246
    dtype=[xp.float64, xp.float32],
    1247+
    force_writeable=True,
    12471248
    multi_output=True,
    12481249
    y_numeric=True,
    12491250
    )
    @@ -1293,6 +1294,7 @@ def _prepare_data(self, X, y, sample_weight, solver):
    12931294
    accept_sparse=accept_sparse,
    12941295
    multi_output=True,
    12951296
    y_numeric=False,
    1297+
    force_writeable=True,
    12961298
    )
    12971299

    12981300
    self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)

    0 commit comments

    Comments
     (0)
    0