@@ -635,6 +635,98 @@ def split(self, X, y, labels=None):
635
635
return super (StratifiedKFold , self ).split (X , y , labels )
636
636
637
637
638
+ class TimeSeriesCV (_BaseKFold ):
639
+ """Time Series cross-validator
640
+
641
+ Provides train/test indices to split time series data samples
642
+ that are observed at fixed time intervals, in train/test sets.
643
+ In each split, test indices must be higher than before, and thus shuffling
644
+ in cross validator is inappropriate.
645
+
646
+ This cross-validation object is a variation of :class:`KFold`.
647
+ In the kth split, it returns first k folds as train set and the
648
+ (k+1)th fold as test set.
649
+
650
+ Note that unlike standard cross-validation methods, successive
651
+ training sets are supersets of those that come before them.
652
+
653
+ Read more in the :ref:`User Guide <cross_validation>`.
654
+
655
+ Parameters
656
+ ----------
657
+ n_splits : int, default=3
658
+ Number of splits. Must be at least 1.
659
+
660
+ Examples
661
+ --------
662
+ >>> from sklearn.model_selection import TimeSeriesCV
663
+ >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
664
+ >>> y = np.array([1, 2, 3, 4])
665
+ >>> tscv = TimeSeriesCV(n_splits=3)
666
+ >>> print(tscv) # doctest: +NORMALIZE_WHITESPACE
667
+ TimeSeriesCV(n_splits=3)
668
+ >>> for train_index, test_index in tscv.split(X):
669
+ ... print("TRAIN:", train_index, "TEST:", test_index)
670
+ ... X_train, X_test = X[train_index], X[test_index]
671
+ ... y_train, y_test = y[train_index], y[test_index]
672
+ TRAIN: [0] TEST: [1]
673
+ TRAIN: [0 1] TEST: [2]
674
+ TRAIN: [0 1 2] TEST: [3]
675
+
676
+ Notes
677
+ -----
678
+ The training set has size ``i * n_samples // (n_splits + 1)
679
+ + n_samples % (n_splits + 1)`` in the ``i``th split,
680
+ with a test set of size ``n_samples//(n_splits + 1)``,
681
+ where ``n_samples`` is the number of samples.
682
+ """
683
+ def __init__ (self , n_splits = 3 ):
684
+ super (TimeSeriesCV , self ).__init__ (n_splits ,
685
+ shuffle = False ,
686
+ random_state = None )
687
+
688
+ def split (self , X , y = None , labels = None ):
689
+ """Generate indices to split data into training and test set.
690
+
691
+ Parameters
692
+ ----------
693
+ X : array-like, shape (n_samples, n_features)
694
+ Training data, where n_samples is the number of samples
695
+ and n_features is the number of features.
696
+
697
+ y : array-like, shape (n_samples,)
698
+ The target variable for supervised learning problems.
699
+
700
+ labels : array-like, with shape (n_samples,), optional
701
+ Group labels for the samples used while splitting the dataset into
702
+ train/test set.
703
+
704
+ Returns
705
+ -------
706
+ train : ndarray
707
+ The training set indices for that split.
708
+
709
+ test : ndarray
710
+ The testing set indices for that split.
711
+ """
712
+ X , y , labels = indexable (X , y , labels )
713
+ n_samples = _num_samples (X )
714
+ n_splits = self .n_splits
715
+ n_folds = n_splits + 1
716
+ if n_folds > n_samples :
717
+ raise ValueError (
718
+ ("Cannot have number of folds ={0} greater"
719
+ " than the number of samples: {1}." ).format (n_folds ,
720
+ n_samples ))
721
+ indices = np .arange (n_samples )
722
+ test_size = (n_samples // n_folds )
723
+ test_starts = range (test_size + n_samples % n_folds ,
724
+ n_samples , test_size )
725
+ for test_start in test_starts :
726
+ yield (indices [:test_start ],
727
+ indices [test_start :test_start + test_size ])
728
+
729
+
638
730
class LeaveOneLabelOut (BaseCrossValidator ):
639
731
"""Leave One Label Out cross-validator
640
732
0 commit comments