|
32 | 32 | from .metrics.scorer import check_scoring
|
33 | 33 | from .utils.fixes import bincount
|
34 | 34 |
|
35 |
| -__all__ = ['Bootstrap', |
36 |
| - 'KFold', |
| 35 | +__all__ = ['KFold', |
37 | 36 | 'LeaveOneLabelOu
8000
t',
|
38 | 37 | 'LeaveOneOut',
|
39 | 38 | 'LeavePLabelOut',
|
@@ -593,142 +592,6 @@ def __len__(self):
|
593 | 592 | factorial(self.p))
|
594 | 593 |
|
595 | 594 |
|
596 |
| -class Bootstrap(object): |
597 |
| - """Random sampling with replacement cross-validation iterator |
598 |
| -
|
599 |
| - Provides train/test indices to split data in train test sets |
600 |
| - while resampling the input n_iter times: each time a new |
601 |
| - random split of the data is performed and then samples are drawn |
602 |
| - (with replacement) on each side of the split to build the training |
603 |
| - and test sets. |
604 |
| -
|
605 |
| - Note: contrary to other cross-validation strategies, bootstrapping |
606 |
| - will allow some samples to occur several times in each splits. However |
607 |
| - a sample that occurs in the train split will never occur in the test |
608 |
| - split and vice-versa. |
609 |
| -
|
610 |
| - If you want each sample to occur at most once you should probably |
611 |
| - use ShuffleSplit cross validation instead. |
612 |
| -
|
613 |
| - Parameters |
614 |
| - ---------- |
615 |
| - n : int |
616 |
| - Total number of elements in the dataset. |
617 |
| -
|
618 |
| - n_iter : int (default is 3) |
619 |
| - Number of bootstrapping iterations |
620 |
| -
|
621 |
| - train_size : int or float (default is 0.5) |
622 |
| - If int, number of samples to include in the training split |
623 |
| - (should be smaller than the total number of samples passed |
624 |
| - in the dataset). |
625 |
| -
|
626 |
| - If float, should be between 0.0 and 1.0 and represent the |
627 |
| - proportion of the dataset to include in the train split. |
628 |
| -
|
629 |
| - test_size : int or float or None (default is None) |
630 |
| - If int, number of samples to include in the training set |
631 |
| - (should be smaller than the total number of samples passed |
632 |
| - in the dataset). |
633 |
| -
|
634 |
| - If float, should be between 0.0 and 1.0 and represent the |
635 |
| - proportion of the dataset to include in the test split. |
636 |
| -
|
637 |
| - If None, n_test is set as the complement of n_train. |
638 |
| -
|
639 |
| - random_state : int or RandomState |
640 |
| - Pseudo number generator state used for random sampling. |
641 |
| -
|
642 |
| - Examples |
643 |
| - -------- |
644 |
| - >>> from sklearn import cross_validation |
645 |
| - >>> bs = cross_validation.Bootstrap(9, random_state=0) |
646 |
| - >>> len(bs) |
647 |
| - 3 |
648 |
| - >>> print(bs) |
649 |
| - Bootstrap(9, n_iter=3, train_size=5, test_size=4, random_state=0) |
650 |
| - >>> for train_index, test_index in bs: |
651 |
| - ... print("TRAIN:", train_index, "TEST:", test_index) |
652 |
| - ... |
653 |
| - TRAIN: [1 8 7 7 8] TEST: [0 3 0 5] |
654 |
| - TRAIN: [5 4 2 4 2] TEST: [6 7 1 0] |
655 |
| - TRAIN: [4 7 0 1 1] TEST: [5 3 6 5] |
656 |
| -
|
657 |
| - See also |
658 |
| - -------- |
659 |
| - ShuffleSplit: cross validation using random permutations. |
660 |
| - """ |
661 |
| - |
662 |
| - # Static marker to be able to introspect the CV type |
663 |
| - indices = True |
664 |
| - |
665 |
| - def __init__(self, n, n_iter=3, train_size=.5, test_size=None, |
666 |
| - random_state=None): |
667 |
| - # See, e.g., http://youtu.be/BzHz0J9a6k0?t=9m38s for a motivation |
668 |
| - # behind this deprecation |
669 |
| - warnings.warn("Bootstrap will no longer be supported as a " + |
670 |
| - "cross-validation method as of version 0.15 and " + |
671 |
| - "will be removed in 0.17", DeprecationWarning) |
672 |
| - self.n = n |
673 |
| - self.n_iter = n_iter |
674 |
| - if isinstance(train_size, numbers.Integral): |
675 |
| - self.train_size = train_size |
676 |
| - elif (isinstance(train_size, numbers.Real) and train_size >= 0.0 |
677 |
| - and train_size <= 1.0): |
678 |
| - self.train_size = int(ceil(train_size * n)) |
679 |
| - else: |
680 |
| - raise ValueError("Invalid value for train_size: %r" % |
681 |
| - train_size) |
682 |
| - if self.train_size > n: |
683 |
| - raise ValueError("train_size=%d should not be larger than n=%d" % |
684 |
| - (self.train_size, n)) |
685 |
| - |
686 |
| - if isinstance(test_size, numbers.Integral): |
687 |
| - self.test_size = test_size |
688 |
| - elif isinstance(test_size, numbers.Real) and 0.0 <= test_size <= 1.0: |
689 |
| - self.test_size = int(ceil(test_size * n)) |
690 |
| - elif test_size is None: |
691 |
| - self.test_size = self.n - self.train_size |
692 |
| - else: |
693 |
| - raise ValueError("Invalid value for test_size: %r" % test_size) |
694 |
| - if self.test_size > n - self.train_size: |
695 |
| - raise ValueError(("test_size + train_size=%d, should not be " + |
696 |
| - "larger than n=%d") % |
697 |
| - (self.test_size + self.train_size, n)) |
698 |
| - |
699 |
| - self.random_state = random_state |
700 |
| - |
701 |
| - def __iter__(self): |
702 |
| - rng = check_random_state(self.random_state) |
703 |
| - for i in range(self.n_iter): |
704 |
| - # random partition |
705 |
| - permutation = rng.permutation(self.n) |
706 |
| - ind_train = permutation[:self.train_size] |
707 |
| - ind_test = permutation[self.train_size:self.train_size |
708 |
| - + self.test_size] |
709 |
| - |
710 |
| - # bootstrap in each split individually |
711 |
| - train = rng.randint(0, self.train_size, |
712 |
| - size=(self.train_size,)) |
713 |
| - test = rng.randint(0, self.test_size, |
714 |
| - size=(self.test_size,)) |
715 |
| - yield ind_train[train], ind_test[test] |
716 |
| - |
717 |
| - def __repr__(self): |
718 |
| - return ('%s(%d, n_iter=%d, train_size=%d, test_size=%d, ' |
719 |
| - 'random_state=%s)' % ( |
720 |
| - self.__class__.__name__, |
721 |
| - self.n, |
722 |
| - self.n_iter, |
723 |
| - self.train_size, |
724 |
| - self.test_size, |
725 |
| - self.random_state, |
726 |
| - )) |
727 |
| - |
728 |
| - def __len__(self): |
729 |
| - return self.n_iter |
730 |
| - |
731 |
| - |
732 | 595 | class BaseShuffleSplit(with_metaclass(ABCMeta)):
|
733 | 596 | """Base class for ShuffleSplit and StratifiedShuffleSplit"""
|
734 | 597 |
|
@@ -815,9 +678,6 @@ class ShuffleSplit(BaseShuffleSplit):
|
815 | 678 | TRAIN: [2 1] TEST: [0]
|
816 | 679 | TRAIN: [0 2] TEST: [3]
|
817 | 680 |
|
818 |
| - See also |
819 |
| - -------- |
820 |
| - Bootstrap: cross-validation using re-sampling with replacement. |
821 | 681 | """
|
822 | 682 |
|
823 | 683 | def _iter_indices(self):
|
|
0 commit comments