@@ -732,6 +732,173 @@ def __init__(self, loss, learning_rate, n_estimators, min_samples_split,
732
732
733
733
self .estimators_ = np .empty ((0 , 0 ), dtype = np .object )
734
734
735
+ def _fit_stage (self , i , X , y , y_pred , sample_weight , sample_mask ,
736
+ criterion , splitter , random_state ):
737
+ """Fit another stage of ``n_classes_`` trees to the boosting model. """
738
+
739
+ assert sample_mask .dtype == np .bool
740
+ loss = self .loss_
741
+ original_y = y
742
+
743
+ for k in range (loss .K ):
744
+ if loss .is_multi_class :
745
+ y = np .array (original_y == k , dtype = np .float64 )
746
+
747
+ residual = loss .negative_gradient (y , y_pred , k = k ,
748
+ sample_weight = sample_weight )
749
+
750
+ # induce regression tree on residuals
751
+ tree = DecisionTreeRegressor (
752
+ criterion = criterion ,
753
+ splitter = splitter ,
754
+ max_depth = self .max_depth ,
755
+ min_samples_split = self .min_samples_split ,
756
+ min_samples_leaf = self .min_samples_leaf ,
757
+ min_weight_fraction_leaf = self .min_weight_fraction_leaf ,
758
+ max_features = self .max_features ,
759
+ max_leaf_nodes = self .max_leaf_nodes ,
760
+ random_state = random_state )
761
+
762
+ if self .subsample < 1.0 :
763
+ # no inplace multiplication!
764
+ sample_weight = sample_weight * sample_mask .astype (np .float64 )
765
+
766
+ tree .fit (X , residual , sample_weight = sample_weight ,
767
+ check_input = False )
768
+
769
+ # update tree leaves
770
+ loss .update_terminal_regions (tree .tree_ , X , y , residual , y_pred ,
771
+ sample_weight , sample_mask ,
772
+ self .learning_rate , k = k )
773
+
774
+ # add tree to ensemble
775
+ self .estimators_ [i , k ] = tree
776
+
777
+ return y_pred
778
+
779
+ def _check_params (self ):
780
+ """Check validity of parameters and raise ValueError if not valid. """
781
+ if self .n_estimators <= 0 :
782
+ raise ValueError ("n_estimators must be greater than 0 but "
783
+ "was %r" % self .n_estimators )
784
+
785
+ if self .learning_rate <= 0.0 :
786
+ raise ValueError ("learning_rate must be greater than 0 but "
787
+ "was %r" % self .learning_rate )
788
+
789
+ if (self .loss not in self ._SUPPORTED_LOSS
790
+ or self .loss not in LOSS_FUNCTIONS ):
791
+ raise ValueError ("Loss '{0:s}' not supported. " .format (self .loss ))
792
+
793
+ if self .loss == 'deviance' :
794
+ loss_class = (MultinomialDeviance
795
+ if len (self .classes_ ) > 2
796
+ else BinomialDeviance )
797
+ else :
798
+ loss_class = LOSS_FUNCTIONS [self .loss ]
799
+
800
+ if self .loss in ('huber' , 'quantile' ):
801
+ self .loss_ = loss_class (self .n_classes_ , self .alpha )
802
+ else :
803
+ self .loss_ = loss_class (self .n_classes_ )
804
+
805
+ if not (0.0 < self .subsample <= 1.0 ):
806
+ raise ValueError ("subsample must be in (0,1] but "
807
+ "was %r" % self .subsample )
808
+
809
+ if self .init is not None :
810
+ if isinstance (self .init , six .string_types ):
811
+ if self .init not in INIT_ESTIMATORS :
812
+ raise ValueError ('init="%s" is not supported' % self .init )
813
+ else :
814
+ if (not hasattr (self .init , 'fit' )
815
+ or not hasattr (self .init , 'predict' )):
816
+ raise ValueError ("init=%r must be valid BaseEstimator "
817
+ "and support both fit and "
818
+ "predict" % self .init )
819
+
820
+ if not (0.0 < self .alpha < 1.0 ):
821
+ raise ValueError ("alpha must be in (0.0, 1.0) but "
822
+ "was %r" % self .alpha )
823
+
824
+ if isinstance (self .max_features , six .string_types ):
825
+ if self .max_features == "auto" :
826
+ # if is_classification
827
+ if self .n_classes_ > 1 :
828
+ max_features = max (1 , int (np .sqrt (self .n_features )))
829
+ else :
830
+ # is regression
831
+ max_features = self .n_features
832
+ elif self .max_features == "sqrt" :
833
+ max_features = max (1 , int (np .sqrt (self .n_features )))
834
+ elif self .max_features == "log2" :
835
+ max_features = max (1 , int (np .log2 (self .n_features )))
836
+ else :
837
+ raise ValueError ("Invalid value for max_features: %r. "
838
+ "Allowed string values are 'auto', 'sqrt' "
839
+ "or 'log2'." % self .max_features )
840
+ elif self .max_features is None :
841
+ max_features = self .n_features
842
+ elif isinstance (self .max_features , (numbers .Integral , np .integer )):
843
+ max_features = self .max_features
844
+ else : # float
845
+ if 0. < self .max_features <= 1. :
846
+ max_features = max (int (self .max_features * self .n_features ), 1 )
847
+ else :
848
+ raise ValueError ("max_features must be in (0, n_features]" )
849
+
850
+ self .max_features_ = max_features
851
+
852
+ def _init_state (self ):
853
+ """Initialize model state and allocate model state data structures. """
854
+
855
+ if self .init is None :
856
+ self .init_ = self .loss_ .init_estimator ()
857
+ elif isinstance (self .init , six .string_types ):
858
+ self .init_ = INIT_ESTIMATORS [self .init ]()
859
+ else :
860
+ self .init_ = self .init
861
+
862
+ self .estimators_ = np .empty ((self .n_estimators , self .loss_ .K ),
863
+ dtype = np .object )
864
+ self .train_score_ = np .zeros ((self .n_estimators ,), dtype = np .float64 )
865
+ # do oob?
866
+ if self .subsample < 1.0 :
867
+ self .oob_improvement_ = np .zeros ((self .n_estimators ),
868
+ dtype = np .float64 )
869
+
870
+ def _clear_state (self ):
871
+ """Clear the state of the gradient boosting model. """
872
+ if hasattr (self , 'estimators_' ):
873
+ self .estimators_ = np .empty ((0 , 0 ), dtype = np .object )
874
+ if hasattr (self , 'train_score_' ):
875
+ del self .train_score_
876
+ if hasattr (self , 'oob_improvement_' ):
877
+ del self .oob_improvement_
878
+ if hasattr (self , 'init_' ):
879
+ del self .init_
880
+
881
+ def _resize_state (self ):
882
+ """Add additional ``n_estimators`` entries to all attributes. """
883
+ # self.n_estimators is the number of additional est to fit
884
+ total_n_estimators = self .n_estimators
885
+ if total_n_estimators < self .estimators_ .shape [0 ]:
886
+ raise ValueError ('resize with smaller n_estimators %d < %d' %
887
+ (total_n_estimators , self .estimators_ [0 ]))
888
+
889
+ self .estimators_ .resize ((total_n_estimators , self .loss_ .K ))
890
+ self .train_score_ .resize (total_n_estimators )
891
+ if (self .subsample < 1 or hasattr (self , 'oob_improvement_' )):
892
+ # if do oob resize arrays or create new if not available
893
+ if hasattr (self , 'oob_improvement_' ):
894
+ self .oob_improvement_ .resize (total_n_estimators )
895
+ else :
896
+ self .oob_improvement_ = np .zeros ((total_n_estimators ,),
897
+ dtype = np .float64 )
898
+
899
+ def _is_initialized (self ):
900
+ return len (getattr (self , 'estimators_' , [])) > 0
901
+
735
902
def fit (self , X , y , sample_weight = None , monitor = None ):
736
903
"""Fit the gradient boosting model.
737
904
@@ -935,173 +1102,6 @@ def _fit_stages(self, X, y, y_pred, sample_weight, random_state,
935
1102
break
936
1103
return i + 1
937
1104
938
- def _fit_stage (self , i , X , y , y_pred , sample_weight , sample_mask ,
939
- criterion , splitter , random_state ):
940
- """Fit another stage of ``n_classes_`` trees to the boosting model. """
941
-
942
- assert sample_mask .dtype == np .bool
943
- loss = self .loss_
944
- original_y = y
945
-
946
- for k in range (loss .K ):
947
- if loss .is_multi_class :
948
- y = np .array (original_y == k , dtype = np .float64 )
949
-
950
- residual = loss .negative_gradient (y , y_pred , k = k ,
951
- sample_weight = sample_weight )
952
-
953
- # induce regression tree on residuals
954
- tree = DecisionTreeRegressor (
955
- criterion = criterion ,
956
- splitter = splitter ,
957
- max_depth = self .max_depth ,
958
- min_samples_split = self .min_samples_split ,
959
- min_samples_leaf = self .min_samples_leaf ,
960
- min_weight_fraction_leaf = self .min_weight_fraction_leaf ,
961
- max_features = self .max_features ,
962
- max_leaf_nodes = self .max_leaf_nodes ,
963
- random_state = random_state )
964
-
965
- if self .subsample < 1.0 :
966
- # no inplace multiplication!
967
- sample_weight = sample_weight * sample_mask .astype (np .float64 )
968
-
969
- tree .fit (X , residual , sample_weight = sample_weight ,
970
- check_input = False )
971
-
972
- # update tree leaves
973
- loss .update_terminal_regions (tree .tree_ , X , y , residual , y_pred ,
974
- sample_weight , sample_mask ,
975
- self .learning_rate , k = k )
976
-
977
- # add tree to ensemble
978
- self .estimators_ [i , k ] = tree
979
-
980
- return y_pred
981
-
982
- def _check_params (self ):
983
- """Check validity of parameters and raise ValueError if not valid. """
984
- if self .n_estimators <= 0 :
985
- raise ValueError ("n_estimators must be greater than 0 but "
986
- "was %r" % self .n_estimators )
987
-
988
- if self .learning_rate <= 0.0 :
989
- raise ValueError ("learning_rate must be greater than 0 but "
990
- "was %r" % self .learning_rate )
991
-
992
- if (self .loss not in self ._SUPPORTED_LOSS
993
- or self .loss not in LOSS_FUNCTIONS ):
994
- raise ValueError ("Loss '{0:s}' not supported. " .format (self .loss ))
995
-
996
- if self .loss == 'deviance' :
997
- loss_class = (MultinomialDeviance
998
- if len (self .classes_ ) > 2
999
- else BinomialDeviance )
1000
- else :
1001
- loss_class = LOSS_FUNCTIONS [self .loss ]
1002
-
1003
- if self .loss in ('huber' , 'quantile' ):
1004
- self .loss_ = loss_class (self .n_classes_ , self .alpha )
1005
- else :
1006
- self .loss_ = loss_class (self .n_classes_ )
1007
-
1008
- if not (0.0 < self .subsample <= 1.0 ):
1009
- raise ValueError ("subsample must be in (0,1] but "
1010
- "was %r" % self .subsample )
1011
-
1012
- if self .init is not None :
1013
- if isinstance (self .init , six .string_types ):
1014
- if self .init not in INIT_ESTIMATORS :
1015
- raise ValueError ('init="%s" is not supported' % self .init )
1016
- else :
1017
- if (not hasattr (self .init , 'fit' )
1018
- or not hasattr (self .init , 'predict' )):
1019
- raise ValueError ("init=%r must be valid BaseEstimator "
1020
- "and support both fit and "
1021
- "predict" % self .init )
1022
-
1023
- if not (0.0 < self .alpha < 1.0 ):
1024
- raise ValueError ("alpha must be in (0.0, 1.0) but "
1025
- "was %r" % self .alpha )
1026
-
1027
- if isinstance (self .max_features , six .string_types ):
1028
- if self .max_features == "auto" :
1029
- # if is_classification
1030
- if self .n_classes_ > 1 :
1031
- max_features = max (1 , int (np .sqrt (self .n_features )))
1032
- else :
1033
- # is regression
1034
- max_features = self .n_features
1035
- elif self .max_features == "sqrt" :
1036
- max_features = max (1 , int (np .sqrt (self .n_features )))
1037
- elif self .max_features == "log2" :
A851
1038
- max_features = max (1 , int (np .log2 (self .n_features )))
1039
- else :
1040
- raise ValueError ("Invalid value for max_features: %r. "
1041
- "Allowed string values are 'auto', 'sqrt' "
1042
- "or 'log2'." % self .max_features )
1043
- elif self .max_features is None :
1044
- max_features = self .n_features
1045
- elif isinstance (self .max_features , (numbers .Integral , np .integer )):
1046
- max_features = self .max_features
1047
- else : # float
1048
- if 0. < self .max_features <= 1. :
1049
- max_features = max (int (self .max_features * self .n_features ), 1 )
1050
- else :
1051
- raise ValueError ("max_features must be in (0, n_features]" )
1052
-
1053
- self .max_features_ = max_features
1054
-
1055
- def _init_state (self ):
1056
- """Initialize model state and allocate model state data structures. """
1057
-
1058
- if self .init is None :
1059
- self .init_ = self .loss_ .init_estimator ()
1060
- elif isinstance (self .init , six .string_types ):
1061
- self .init_ = INIT_ESTIMATORS [self .init ]()
1062
- else :
1063
- self .init_ = self .init
1064
-
1065
- self .estimators_ = np .empty ((self .n_estimators , self .loss_ .K ),
1066
- dtype = np .object )
1067
- self .train_score_ = np .zeros ((self .n_estimators ,), dtype = np .float64 )
1068
- # do oob?
1069
- if self .subsample < 1.0 :
1070
- self .oob_improvement_ = np .zeros ((self .n_estimators ),
1071
- dtype = np .float64 )
1072
-
1073
- def _clear_state (self ):
1074
- """Clear the state of the gradient boosting model. """
1075
- if hasattr (self , 'estimators_' ):
1076
- self .estimators_ = np .empty ((0 , 0 ), dtype = np .object )
1077
- if hasattr (self , 'train_score_' ):
1078
- del self .train_score_
1079
- if hasattr (self , 'oob_improvement_' ):
1080
- del self .oob_improvement_
1081
- if hasattr (self , 'init_' ):
1082
- del self .init_
1083
-
1084
- def _resize_state (self ):
1085
- """Add additional ``n_estimators`` entries to all attributes. """
1086
- # self.n_estimators is the number of additional est to fit
1087
- total_n_estimators = self .n_estimators
1088
- if total_n_estimators < self .estimators_ .shape [0 ]:
1089
- raise ValueError ('resize with smaller n_estimators %d < %d' %
1090
- (total_n_estimators , self .estimators_ [0 ]))
1091
-
1092
- self .estimators_ .resize ((total_n_estimators , self .loss_ .K ))
1093
- self .train_score_ .resize (total_n_estimators )
1094
- if (self .subsample < 1 or hasattr (self , 'oob_improvement_' )):
1095
- # if do oob resize arrays or create new if not available
1096
- if hasattr (self , 'oob_improvement_' ):
1097
- self .oob_improvement_ .resize (total_n_estimators )
1098
- else :
1099
- self .oob_improvement_ = np .zeros ((total_n_estimators ,),
1100
- dtype = np .float64 )
1101
-
1102
- def _is_initialized (self ):
1103
- return len (getattr (self , 'estimators_' , [])) > 0
1104
-
1105
1105
def _make_estimator (self , append = True ):
1106
1106
# we don't need _make_estimator
1107
1107
raise NotImplementedError ()
0 commit comments