8000 MNT Replace kwargs by named args for resample (#17324) · viclafargue/scikit-learn@5d7401b · GitHub
[go: up one dir, main page]

Skip to content

Commit 5d7401b

Browse files
alfaro96viclafargue
authored andcommitted
MNT Replace kwargs by named args for resample (scikit-learn#17324)
1 parent 538fa7f commit 5d7401b

File tree

2 files changed

+17
-21
lines changed

2 files changed

+17
-21
lines changed

sklearn/utils/__init__.py

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -413,43 +413,48 @@ def _get_column_indices(X, key):
413413
"strings, or boolean mask is allowed")
414414

415415

416-
def resample(*arrays, **options):
417-
"""Resample arrays or sparse matrices in a consistent way
416+
def resample(*arrays,
417+
replace=True,
418+
n_samples=None,
419+
random_state=None,
420+
stratify=None):
421+
"""Resample arrays or sparse matrices in a consistent way.
418422
419423
The default strategy implements one step of the bootstrapping
420424
procedure.
421425
422426
Parameters
423427
----------
424-
*arrays : sequence of indexable data-structures
428+
*arrays : sequence of array-like of shape (n_samples,) or \
429+
(n_samples, n_outputs)
425430
Indexable data-structures can be arrays, lists, dataframes or scipy
426431
sparse matrices with consistent first dimension.
427432
428-
Other Parameters
429-
----------------
430-
replace : boolean, True by default
433+
replace : bool, default=True
431434
Implements resampling with replacement. If False, this will implement
432435
(sliced) random permutations.
433436
434-
n_samples : int, None by default
437+
n_samples : int, default=None
435438
Number of samples to generate. If left to None this is
436439
automatically set to the first dimension of the arrays.
437440
If replace is False it should not be larger than the length of
438441
arrays.
439442
440-
random_state : int, RandomState instance or None, optional (default=None)
443+
random_state : int or RandomState instance, default=None
441444
Determines random number generation for shuffling
442445
the data.
443446
Pass an int for reproducible results across multiple function calls.
444447
See :term:`Glossary <random_state>`.
445448
446-
stratify : array-like or None (default=None)
449+
stratify : array-like of shape (n_samples,) or (n_samples, n_outputs), \
450+
default=None
447451
If not None, data is split in a stratified fashion, using this as
448452
the class labels.
449453
450454
Returns
451455
-------
452-
resampled_arrays : sequence of indexable data-structures
456+
resampled_arrays : sequence of array-like of shape (n_samples,) or \
457+
(n_samples, n_outputs)
453458
Sequence of resampled copies of the collections. The original arrays
454459
are not impacted.
455460
@@ -492,18 +497,12 @@ def resample(*arrays, **options):
492497
... random_state=0)
493498
[1, 1, 1, 0, 1]
494499
495-
496500
See also
497501
--------
498502
:func:`sklearn.utils.shuffle`
499503
"""
500-
501-
random_state = check_random_state(options.pop('random_state', None))
502-
replace = options.pop('replace', True)
503-
max_n_samples = options.pop('n_samples', None)
504-
stratify = options.pop('stratify', None)
505-
if options:
506-
raise ValueError("Unexpected kw arguments: %r" % options.keys())
504+
max_n_samples = n_samples
505< 8000 span class="diff-text-marker">+
random_state = check_random_state(random_state)
507506

508507
if len(arrays) == 0:
509508
return None
@@ -556,7 +555,6 @@ def resample(*arrays, **options):
556555

557556
indices = random_state.permutation(indices)
558557

559-
560558
# convert sparse matrices to CSR for row-based indexing
561559
arrays = [a.tocsr() if issparse(a) else a for a in arrays]
562560
resampled_arrays = [_safe_indexing(a, indices) for a in arrays]

sklearn/utils/tests/test_utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,6 @@ def test_resample():
116116
with pytest.raises(ValueError):
117117
resample([0, 1], [0, 1], replace=False, n_samples=3)
118118

119-
with pytest.raises(ValueError):
120-
resample([0, 1], [0, 1], meaning_of_life=42)
121119
# Issue:6581, n_samples can be more when replace is True (default).
122120
assert len(resample([1, 2], n_samples=5)) == 5
123121

0 commit comments

Comments
 (0)
0