@@ -413,43 +413,48 @@ def _get_column_indices(X, key):
413
413
"strings, or boolean mask is allowed" )
414
414
415
415
416
- def resample (* arrays , ** options ):
417
- """Resample arrays or sparse matrices in a consistent way
416
+ def resample (* arrays ,
417
+ replace = True ,
418
+ n_samples = None ,
419
+ random_state = None ,
420
+ stratify = None ):
421
+ """Resample arrays or sparse matrices in a consistent way.
418
422
419
423
The default strategy implements one step of the bootstrapping
420
424
procedure.
421
425
422
426
Parameters
423
427
----------
424
- *arrays : sequence of indexable data-structures
428
+ *arrays : sequence of array-like of shape (n_samples,) or \
429
+ (n_samples, n_outputs)
425
430
Indexable data-structures can be arrays, lists, dataframes or scipy
426
431
sparse matrices with consistent first dimension.
427
432
428
- Other Parameters
429
- ----------------
430
- replace : boolean, True by default
433
+ replace : bool, default=True
431
434
Implements resampling with replacement. If False, this will implement
432
435
(sliced) random permutations.
433
436
434
- n_samples : int, None by default
437
+ n_samples : int, default=None
435
438
Number of samples to generate. If left to None this is
436
439
automatically set to the first dimension of the arrays.
437
440
If replace is False it should not be larger than the length of
438
441
arrays.
439
442
440
- random_state : int, RandomState instance or None, optional ( default=None)
443
+ random_state : int or RandomState instance, default=None
441
444
Determines random number generation for shuffling
442
445
the data.
443
446
Pass an int for reproducible results across multiple function calls.
444
447
See :term:`Glossary <random_state>`.
445
448
446
- stratify : array-like or None (default=None)
449
+ stratify : array-like of shape (n_samples,) or (n_samples, n_outputs), \
450
+ default=None
447
451
If not None, data is split in a stratified fashion, using this as
448
452
the class labels.
449
453
450
454
Returns
451
455
-------
452
- resampled_arrays : sequence of indexable data-structures
456
+ resampled_arrays : sequence of array-like of shape (n_samples,) or \
457
+ (n_samples, n_outputs)
453
458
Sequence of resampled copies of the collections. The original arrays
454
459
are not impacted.
455
460
@@ -492,18 +497,12 @@ def resample(*arrays, **options):
492
497
... random_state=0)
493
498
[1, 1, 1, 0, 1]
494
499
495
-
496
500
See also
497
501
--------
498
502
:func:`sklearn.utils.shuffle`
499
503
"""
500
-
501
- random_state = check_random_state (options .pop ('random_state' , None ))
502
- replace = options .pop ('replace' , True )
503
- max_n_samples = options .pop ('n_samples' , None )
504
- stratify = options .pop ('stratify' , None )
505
- if options :
506
- raise ValueError ("Unexpected kw arguments: %r" % options .keys ())
504
+ max_n_samples = n_samples
505
<
8000
span class="diff-text-marker">+ random_state = check_random_state (random_state )
507
506
508
507
if len (arrays ) == 0 :
509
508
return None
@@ -556,7 +555,6 @@ def resample(*arrays, **options):
556
555
557
556
indices = random_state .permutation (indices )
558
557
559
-
560
558
# convert sparse matrices to CSR for row-based indexing
561
559
arrays = [a .tocsr () if issparse (a ) else a for a in arrays ]
562
560
resampled_arrays = [_safe_indexing (a , indices ) for a in arrays ]
0 commit comments