8000 remove cross_validation.Bootstrap · raghavrv/scikit-learn@e1af92b · GitHub
[go: up one dir, main page]

Skip to content

Commit e1af92b

Browse files
committed
remove cross_validation.Bootstrap
1 parent 6beacc3 commit e1af92b

File tree

3 files changed

+4
-168
lines changed

3 files changed

+4
-168
lines changed

sklearn/cross_validation.py

Lines changed: 1 addition & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@
3232
from .metrics.scorer import check_scoring
3333
from .utils.fixes import bincount
3434

35-
__all__ = ['Bootstrap',
36-
'KFold',
35+
__all__ = ['KFold',
3736
'LeaveOneLabelOu 8000 t',
3837
'LeaveOneOut',
3938
'LeavePLabelOut',
@@ -593,142 +592,6 @@ def __len__(self):
593592
factorial(self.p))
594593

595594

596-
class Bootstrap(object):
597-
"""Random sampling with replacement cross-validation iterator
598-
599-
Provides train/test indices to split data in train test sets
600-
while resampling the input n_iter times: each time a new
601-
random split of the data is performed and then samples are drawn
602-
(with replacement) on each side of the split to build the training
603-
and test sets.
604-
605-
Note: contrary to other cross-validation strategies, bootstrapping
606-
will allow some samples to occur several times in each splits. However
607-
a sample that occurs in the train split will never occur in the test
608-
split and vice-versa.
609-
610-
If you want each sample to occur at most once you should probably
611-
use ShuffleSplit cross validation instead.
612-
613-
Parameters
614-
----------
615-
n : int
616-
Total number of elements in the dataset.
617-
618-
n_iter : int (default is 3)
619-
Number of bootstrapping iterations
620-
621-
train_size : int or float (default is 0.5)
622-
If int, number of samples to include in the training split
623-
(should be smaller than the total number of samples passed
624-
in the dataset).
625-
626-
If float, should be between 0.0 and 1.0 and represent the
627-
proportion of the dataset to include in the train split.
628-
629-
test_size : int or float or None (default is None)
630-
If int, number of samples to include in the training set
631-
(should be smaller than the total number of samples passed
632-
in the dataset).
633-
634-
If float, should be between 0.0 and 1.0 and represent the
635-
proportion of the dataset to include in the test split.
636-
637-
If None, n_test is set as the complement of n_train.
638-
639-
random_state : int or RandomState
640-
Pseudo number generator state used for random sampling.
641-
642-
Examples
643-
--------
644-
>>> from sklearn import cross_validation
645-
>>> bs = cross_validation.Bootstrap(9, random_state=0)
646-
>>> len(bs)
647-
3
648-
>>> print(bs)
649-
Bootstrap(9, n_iter=3, train_size=5, test_size=4, random_state=0)
650-
>>> for train_index, test_index in bs:
651-
... print("TRAIN:", train_index, "TEST:", test_index)
652-
...
653-
TRAIN: [1 8 7 7 8] TEST: [0 3 0 5]
654-
TRAIN: [5 4 2 4 2] TEST: [6 7 1 0]
655-
TRAIN: [4 7 0 1 1] TEST: [5 3 6 5]
656-
657-
See also
658-
--------
659-
ShuffleSplit: cross validation using random permutations.
660-
"""
661-
662-
# Static marker to be able to introspect the CV type
663-
indices = True
664-
665-
def __init__(self, n, n_iter=3, train_size=.5, test_size=None,
666-
random_state=None):
667-
# See, e.g., http://youtu.be/BzHz0J9a6k0?t=9m38s for a motivation
668-
# behind this deprecation
669-
warnings.warn("Bootstrap will no longer be supported as a " +
670-
"cross-validation method as of version 0.15 and " +
671-
"will be removed in 0.17", DeprecationWarning)
672-
self.n = n
673-
self.n_iter = n_iter
674-
if isinstance(train_size, numbers.Integral):
675-
self.train_size = train_size
676-
elif (isinstance(train_size, numbers.Real) and train_size >= 0.0
677-
and train_size <= 1.0):
678-
self.train_size = int(ceil(train_size * n))
679-
else:
680-
raise ValueError("Invalid value for train_size: %r" %
681-
train_size)
682-
if self.train_size > n:
683-
raise ValueError("train_size=%d should not be larger than n=%d" %
684-
(self.train_size, n))
685-
686-
if isinstance(test_size, numbers.Integral):
687-
self.test_size = test_size
688-
elif isinstance(test_size, numbers.Real) and 0.0 <= test_size <= 1.0:
689-
self.test_size = int(ceil(test_size * n))
690-
elif test_size is None:
691-
self.test_size = self.n - self.train_size
692-
else:
693-
raise ValueError("Invalid value for test_size: %r" % test_size)
694-
if self.test_size > n - self.train_size:
695-
raise ValueError(("test_size + train_size=%d, should not be " +
696-
"larger than n=%d") %
697-
(self.test_size + self.train_size, n))
698-
699-
self.random_state = random_state
700-
701-
def __iter__(self):
702-
rng = check_random_state(self.random_state)
703-
for i in range(self.n_iter):
704-
# random partition
705-
permutation = rng.permutation(self.n)
706-
ind_train = permutation[:self.train_size]
707-
ind_test = permutation[self.train_size:self.train_size
708-
+ self.test_size]
709-
710-
# bootstrap in each split individually
711-
train = rng.randint(0, self.train_size,
712-
size=(self.train_size,))
713-
test = rng.randint(0, self.test_size,
714-
size=(self.test_size,))
715-
yield ind_train[train], ind_test[test]
716-
717-
def __repr__(self):
718-
return ('%s(%d, n_iter=%d, train_size=%d, test_size=%d, '
719-
'random_state=%s)' % (
720-
self.__class__.__name__,
721-
self.n,
722-
self.n_iter,
723-
self.train_size,
724-
self.test_size,
725-
self.random_state,
726-
))
727-
728-
def __len__(self):
729-
return self.n_iter
730-
731-
732595
class BaseShuffleSplit(with_metaclass(ABCMeta)):
733596
"""Base class for ShuffleSplit and StratifiedShuffleSplit"""
734597

@@ -815,9 +678,6 @@ class ShuffleSplit(BaseShuffleSplit):
815678
TRAIN: [2 1] TEST: [0]
816679
TRAIN: [0 2] TEST: [3]
817680
818-
See also
819-
--------
820-
Bootstrap: cross-validation using re-sampling with replacement.
821681
"""
822682

823683
def _iter_indices(self):

sklearn/tests/test_cross_validation.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from sklearn.utils.testing import assert_not_equal
1717
from sklearn.utils.testing import assert_array_almost_equal
1818
from sklearn.utils.testing import assert_array_equal
19-
from sklearn.utils.testing import assert_warns
2019
from sklearn.utils.testing import assert_warns_message
2120
from sklearn.utils.testing import ignore_warnings
2221
from sklearn.utils.mocking import CheckingClassifier, MockDataFrame
@@ -834,10 +833,8 @@ def test_cross_val_generator_with_indices():
834833
lolo = cval.LeaveOneLabelOut(labels)
835834
lopo = cval.LeavePLabelOut(labels, 2)
836835
ps = cval.PredefinedSplit([1, 1, 2, 2])
837-
# Bootstrap as a cross-validation is deprecated
838-
b = assert_warns(DeprecationWarning, cval.Bootstrap, 2)
839836
ss = cval.ShuffleSplit(2)
840-
for cv in [loo, lpo, kf, skf, lolo, lopo, b, ss, ps]:
837+
for cv in [loo, lpo, kf, skf, lolo, lopo, ss, ps]:
841838
for train, test in cv:
842839
assert_not_equal(np.asarray(train).dtype.kind, 'b')
843840
assert_not_equal(np.asarray(train).dtype.kind, 'b')
@@ -856,36 +853,16 @@ def test_cross_val_generator_with_default_indices():
856853
skf = cval.StratifiedKFold(y, 2)
857854
lolo = cval.LeaveOneLabelOut(labels)
858855
lopo = cval.LeavePLabelOut(labels, 2)
859-
b = cval.Bootstrap(2) # only in index mode
860856
ss = cval.ShuffleSplit(2)
861857
ps = cval.PredefinedSplit([1, 1, 2, 2])
862-
for cv in [loo, lpo, kf, skf, lolo, lopo, b, ss, ps]:
858+
for cv in [loo, lpo, kf, skf, lolo, lopo, ss, ps]:
863859
for train, test in cv:
864860
assert_not_equal(np.asarray(train).dtype.kind, 'b')
865861
assert_not_equal(np.asarray(train).dtype.kind, 'b')
866862
X[train], X[test]
867863
y[train], y[test]
868864

869865

870-
@ignore_warnings
871-
def test_bootstrap_errors():
872-
assert_raises(ValueError, cval.Bootstrap, 10, train_size=100)
873-
assert_raises(ValueError, cval.Bootstrap, 10, test_size=100)
874-
assert_raises(ValueError, cval.Bootstrap, 10, train_size=1.1)
875-
assert_raises(ValueError, cval.Bootstrap, 10, test_size=1.1)
876-
assert_raises(ValueError, cval.Bootstrap, 10, train_size=0.6,
877-
test_size=0.5)
878-
879-
880-
@ignore_warnings
881-
def test_bootstrap_test_sizes():
882-
assert_equal(cval.Bootstrap(10, test_size=0.2).test_size, 2)
883-
assert_equal(cval.Bootstrap(10, test_size=1).test_size, 1)
884-
assert_equal(cval.Bootstrap(10, train_size=1.).train_size, 10)
885-
assert_equal(cval.Bootstrap(10, test_size=2).test_size, 2)
886-
assert_equal(cval.Bootstrap(10, test_size=None).test_size, 5)
887-
888-
889866
def test_shufflesplit_errors():
890867
assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=2.0)
891868
assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=1.0)

sklearn/utils/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def resample(*arrays, **options):
186186
-------
187187
resampled_arrays : sequence of arrays or scipy.sparse matrices with same \
188188
shape[0]
189-
Sequence of resampled views of the collections. The original arrays are
189+
Sequence of resampled views of the collections. The original arrays are
190190
not impacted.
191191
192192
Examples
@@ -224,7 +224,6 @@ def resample(*arrays, **options):
224224
225225
See also
226226
--------
227-
:class:`sklearn.cross_validation.Bootstrap`
228227
:func:`sklearn.utils.shuffle`
229228
"""
230229
random_state = check_random_state(options.pop('random_state', None))

0 commit comments

Comments
 (0)
0