50
50
from .utils ._tags import _safe_tags
51
51
from .utils .validation import _num_samples
52
52
from .utils .validation import check_is_fitted
53
- from .utils .validation import check_X_y , check_array
53
+ from .utils .validation import column_or_1d
54
+ from .utils .validation import _assert_all_finite
54
55
from .utils .multiclass import (_check_partial_fit_first_call ,
55
56
check_classification_targets ,
56
57
_ovr_decision_function )
57
58
from .utils .metaestimators import _safe_split , if_delegate_has_method
58
59
from .utils .fixes import delayed
59
- from .exceptions import NotFittedError
60
60
61
61
from joblib import Parallel
62
62
@@ -114,24 +114,28 @@ def _check_estimator(estimator):
114
114
class _ConstantPredictor (BaseEstimator ):
115
115
116
116
def fit (self , X , y ):
117
+ self ._check_n_features (X , reset = True )
117
118
self .y_ = y
118
119
return self
119
120
120
121
def predict (self , X ):
121
122
check_is_fitted (self )
123
+ self ._check_n_features (X , reset = True )
122
124
123
- return np .repeat (self .y_ , X . shape [ 0 ] )
125
+ return np .repeat (self .y_ , _num_samples ( X ) )
124
126
125
127
def decision_function (self , X ):
126
128
check_is_fitted (self )
129
+ self ._check_n_features (X , reset = True )
127
130
128
- return np .repeat (self .y_ , X . shape [ 0 ] )
131
+ return np .repeat (self .y_ , _num_samples ( X ) )
129
132
130
133
def predict_proba (self , X ):
131
134
check_is_fitted (self )
135
+ self ._check_n_features (X , reset = True )
132
136
133
137
return np .repeat ([np .hstack ([1 - self.y_ , self .y_ ])],
134
- X . shape [ 0 ] , axis = 0 )
138
+ _num_samples ( X ) , axis = 0 )
135
139
136
140
137
141
class OneVsRestClassifier (MultiOutputMixin , ClassifierMixin ,
@@ -219,6 +223,12 @@ class OneVsRestClassifier(MultiOutputMixin, ClassifierMixin,
219
223
multilabel_ : boolean
220
224
Whether a OneVsRestClassifier is a multilabel classifier.
221
225
226
+ n_features_in_ : int
227
+ Number of features seen during :term:`fit`. Only defined if the
228
+ underlying estimator exposes such an attribute when fit.
229
+
230
+ .. versionadded:: 0.24
231
+
222
232
Examples
223
233
--------
224
234
>>> import numpy as np
@@ -282,6 +292,9 @@ def fit(self, X, y):
282
292
self .label_binarizer_ .classes_ [i ]])
283
293
for i , column in enumerate (columns ))
284
294
295
+ if hasattr (self .estimators_ [0 ], "n_features_in_" ):
296
+ self .n_features_in_ = self .estimators_ [0 ].n_features_in_
297
+
285
298
return self
286
299
287
300
@if_delegate_has_method ('estimator' )
@@ -338,6 +351,9 @@ def partial_fit(self, X, y, classes=None):
338
351
delayed (_partial_fit_binary )(estimator , X , column )
339
352
for estimator , column in zip (self .estimators_ , columns ))
340
353
354
+ if hasattr (self .estimators_ [0 ], "n_features_in_" ):
355
+ self .n_features_in_ = self .estimators_ [0 ].n_features_in_
356
+
341
357
return self
342
358
343
359
def predict (self , X ):
@@ -504,19 +520,6 @@ def _more_tags(self):
504
520
def _first_estimator (self ):
505
521
return self .estimators_ [0 ]
506
522
507
- @property
508
- def n_features_in_ (self ):
509
- # For consistency with other estimators we raise a AttributeError so
510
- # that hasattr() fails if the OVR estimator isn't fitted.
511
- try :
512
- check_is_fitted (self )
513
- except NotFittedError as nfe :
514
- raise AttributeError (
515
- "{} object has no n_features_in_ attribute."
516
- .format (self .__class__ .__name__ )
517
- ) from nfe
518
- return self .estimators_ [0 ].n_features_in_
519
-
520
523
521
524
def _fit_ovo_binary (estimator , X , y , i , j ):
522
525
"""Fit a single binary estimator (one-vs-one)."""
@@ -525,7 +528,7 @@ def _fit_ovo_binary(estimator, X, y, i, j):
525
528
y_binary = np .empty (y .shape , int )
526
529
y_binary [y == i ] = 0
527
530
y_binary [y == j ] = 1
528
- indcond = np .arange (X . shape [ 0 ] )[cond ]
531
+ indcond = np .arange (_num_samples ( X ) )[cond ]
529
532
return _fit_binary (estimator ,
530
533
_safe_split (estimator , X , None , indices = indcond )[0 ],
531
534
y_binary , classes = [i , j ]), indcond
@@ -593,6 +596,12 @@ class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
593
596
(renaming of 0.25) and onward, `pairwise_indices_` will use the
594
597
pairwise estimator tag instead.
595
598
599
+ n_features_in_ : int
600
+ Number of features seen during :term:`fit`. Only defined if the
601
+ underlying estimator exposes such an attribute when fit.
602
+
603
+ .. versionadded:: 0.24
604
+
596
605
Examples
597
606
--------
598
607
>>> from sklearn.datasets import load_iris
@@ -626,6 +635,7 @@ def fit(self, X, y):
626
635
-------
627
636
self
628
637
"""
638
+ # We need to validate the data because we do a safe_indexing later.
629
639
X , y = self ._validate_data (X , y , accept_sparse = ['csr' , 'csc' ],
630
640
force_all_finite = False )
631
641
check_classification_targets (y )
@@ -642,6 +652,9 @@ def fit(self, X, y):
642
652
643
653
self .estimators_ = estimators_indices [0 ]
644
654
655
+ if hasattr (self .estimators_ [0 ], "n_features_in_" ):
656
+ self .n_features_in_ = self .estimators_ [0 ].n_features_in_
657
+
645
658
pairwise = _is_pairwise (self )
646
659
self .pairwise_indices_ = (
647
660
estimators_indices [1 ] if pairwise else None )
@@ -686,8 +699,9 @@ def partial_fit(self, X, y, classes=None):
686
699
"must be subset of {1}" .format (np .unique (y ),
687
700
self .classes_ ))
688
701
689
- X , y = check_X_y (X , y , accept_sparse = ['csr' , 'csc' ],
690
- force_all_finite = False )
702
+ X , y = self ._validate_data (
703
+ X , y , accept_sparse = ['csr' , 'csc' ], force_all_finite = False ,
704
+ reset = _check_partial_fit_first_call (self , classes ))
691
705
check_classification_targets (y )
692
706
combinations = itertools .combinations (range (self .n_classes_ ), 2 )
693
707
self .estimators_ = Parallel (
@@ -699,6 +713,9 @@ def partial_fit(self, X, y, classes=None):
699
713
700
714
self .pairwise_indices_ = None
701
715
716
+ if hasattr (self .estimators_ [0 ], "n_features_in_" ):
717
+ self .n_features_in_ = self .estimators_ [0 ].n_features_in_
718
+
702
719
return self
703
720
704
721
def predict (self , X ):
@@ -832,6 +849,12 @@ class OutputCodeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
832
849
code_book_ : numpy array of shape [n_classes, code_size]
833
850
Binary array containing the code of each class.
834
851
852
+ n_features_in_ : int
853
+ Number of features seen during :term:`fit`. Only defined if the
854
+ underlying estimator exposes such an attribute when fit.
855
+
856
+ .. versionadded:: 0.24
857
+
835
858
Examples
836
859
--------
837
860
>>> from sklearn.multiclass import OutputCodeClassifier
@@ -886,7 +909,9 @@ def fit(self, X, y):
886
909
-------
887
910
self
888
911
"""
889
- X , y = self ._validate_data (X , y , accept_sparse = True )
912
+ y = column_or_1d (y , warn = True )
913
+ _assert_all_finite (y )
914
+
890
915
if self .code_size <= 0 :
891
916
raise ValueError ("code_size should be greater than 0, got {0}"
892
917
"" .format (self .code_size ))
@@ -897,6 +922,9 @@ def fit(self, X, y):
897
922
898
923
self .classes_ = np .unique (y )
899
924
n_classes = self .classes_ .shape [0 ]
925
+ if n_classes == 0 :
926
+ raise ValueError ("OutputCodeClassifier can not be fit when no "
927
+ "class is present." )
900
928
code_size_ = int (n_classes * self .code_size )
901
929
902
930
# FIXME: there are more elaborate methods than generating the codebook
@@ -912,12 +940,15 @@ def fit(self, X, y):
912
940
classes_index = {c : i for i , c in enumerate (self .classes_ )}
913
941
914
942
Y = np .array ([self .code_book_ [classes_index [y [i ]]]
915
- for i in range (X . shape [ 0 ] )], dtype = int )
943
+ for i in range (_num_samples ( y ) )], dtype = int )
916
944
917
945
self .estimators_ = Parallel (n_jobs = self .n_jobs )(
918
946
delayed (_fit_binary )(self .estimator , X , Y [:, i ])
919
947
for i in range (Y .shape [1 ]))
920
948
949
+ if hasattr (self .estimators_ [0 ], "n_features_in_" ):
950
+ self .n_features_in_ = self .estimators_ [0 ].n_features_in_
951
+
921
952
return self
922
953
923
954
def predict (self , X ):
@@ -934,7 +965,6 @@ def predict(self, X):
934
965
Predicted multi-class targets.
935
966
"""
936
967
check_is_fitted (self )
937
- X = check_array (X , accept_sparse = True )
938
968
Y = np .array ([_predict_binary (e , X ) for e in self .estimators_ ]).T
939
969
pred = euclidean_distances (Y , self .code_book_ ).argmin (axis = 1 )
940
970
return self .classes_ [pred ]
0 commit comments