@@ -28,6 +28,7 @@ def _return_std(X, trees, predictions, min_variance):
28
28
std : array-like, shape=(n_samples,)
29
29
Standard deviation of `y` at `X`. If criterion
30
30
is set to "mse", then `std[i] ~= std(y | X[i])`.
31
+
31
32
"""
32
33
# This derives std(y | x) as described in 4.3.2 of arXiv:1211.0906
33
34
std = np .zeros (len (X ))
@@ -68,6 +69,7 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
68
69
69
70
max_features : int, float, string or None, optional (default="auto")
70
71
The number of features to consider when looking for the best split:
72
+
71
73
- If int, then consider `max_features` features at each split.
72
74
- If float, then `max_features` is a percentage and
73
75
`int(max_features * n_features)` features are considered at each
@@ -76,9 +78,12 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
76
78
- If "sqrt", then `max_features=sqrt(n_features)`.
77
79
- If "log2", then `max_features=log2(n_features)`.
78
80
- If None, then `max_features=n_features`.
79
- Note: the search for a split does not stop until at least one
80
- valid partition of the node samples is found, even if it requires to
81
- effectively inspec
8000
t more than ``max_features`` features.
81
+
82
+ .. note::
83
+ The search for a split does not stop until at least one
84
+ valid partition of the node samples is found, even if it
85
+ requires to effectively inspect more than ``max_features``
86
+ features.
82
87
83
88
max_depth : integer or None, optional (default=None)
84
89
The maximum depth of the tree. If None, then nodes are expanded until
@@ -87,13 +92,15 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
87
92
88
93
min_samples_split : int, float, optional (default=2)
89
94
The minimum number of samples required to split an internal node:
95
+
90
96
- If int, then consider `min_samples_split` as the minimum number.
91
97
- If float, then `min_samples_split` is a percentage and
92
98
`ceil(min_samples_split * n_samples)` are the minimum
93
99
number of samples for each split.
94
100
95
101
min_samples_leaf : int, float, optional (default=1)
96
102
The minimum number of samples required to be at a leaf node:
103
+
97
104
- If int, then consider `min_samples_leaf` as the minimum number.
98
105
- If float, then `min_samples_leaf` i
6D40
s a percentage and
99
106
`ceil(min_samples_leaf * n_samples)` are the minimum
@@ -113,8 +120,10 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
113
120
A node will be split if this split induces a decrease of the impurity
114
121
greater than or equal to this value.
115
122
The weighted impurity decrease equation is the following::
123
+
116
124
N_t / N * (impurity - N_t_R / N_t * right_impurity
117
125
- N_t_L / N_t * left_impurity)
126
+
118
127
where ``N`` is the total number of samples, ``N_t`` is the number of
119
128
samples at the current node, ``N_t_L`` is the number of samples in the
120
129
left child, and ``N_t_R`` is the number of samples in the right child.
@@ -183,6 +192,7 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
183
192
References
184
193
----------
185
194
.. [1] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001.
195
+
186
196
"""
187
197
def __init__ (self , n_estimators = 10 , criterion = 'mse' , max_depth = None ,
188
198
min_samples_split = 2 , min_samples_leaf = 1 ,
@@ -224,6 +234,7 @@ def predict(self, X, return_std=False):
224
234
std : array-like of shape=(n_samples,)
225
235
Standard deviation of `y` at `X`. If criterion
226
236
is set to "mse", then `std[i] ~= std(y | X[i])`.
237
+
227
238
"""
228
239
mean = super (RandomForestRegressor , self ).predict (X )
229
240
@@ -254,6 +265,7 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
254
265
255
266
max_features : int, float, string or None, optional (default="auto")
256
267
The number of features to consider when looking for the best split:
268
+
257
269
- If int, then consider `max_features` features at each split.
258
270
- If float, then `max_features` is a percentage and
259
271
`int(max_features * n_features)` features are considered at each
@@ -262,9 +274,12 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
262
274
- If "sqrt", then `max_features=sqrt(n_features)`.
263
275
- If "log2", then `max_features=log2(n_features)`.
264
276
- If None, then `max_features=n_features`.
265
- Note: the search for a split does not stop until at least one
266
- valid partition of the node samples is found, even if it requires to
267
- effectively inspect more than ``max_features`` features.
277
+
278
+ .. note::
279
+ The search for a split does not stop until at least one
280
+ valid partition of the node samples is found, even if it
281
+ requires to effectively inspect more than ``max_features``
282
+ features.
268
283
269
284
max_depth : integer or None, optional (default=None)
270
285
The maximum depth of the tree. If None, then nodes are expanded until
@@ -273,13 +288,15 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
273
288
274
289
min_samples_split : int, float, optional (default=2)
275
290
The minimum number of samples required to split an internal node:
291
+
276
292
- If int, then consider `min_samples_split` as the minimum number.
277
293
- If float, then `min_samples_split` is a percentage and
278
294
`ceil(min_samples_split * n_samples)` are the minimum
279
295
number of samples for each split.
280
296
281
297
min_samples_leaf : int, float, optional (default=1)
282
298
The minimum number of samples required to be at a leaf node:
299
+
283
300
- If int, then consider `min_samples_leaf` as the minimum number.
284
301
- If float, then `min_samples_leaf` is a percentage and
285
302
`ceil(min_samples_leaf * n_samples)` are the minimum
@@ -299,8 +316,10 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
299
316
A node will be split if this split induces a decrease of the impurity
300
317
greater than or equal to this value.
301
318
The weighted impurity decrease equation is the following::
319
+
302
320
N_t / N * (impurity - N_t_R / N_t * right_impurity
303
321
- N_t_L / N_t * left_impurity)
322
+
304
323
where ``N`` is the total number of samples, ``N_t`` is the number of
305
324
samples at the current node, ``N_t_L`` is the number of samples in the
306
325
left child, and ``N_t_R`` is the number of samples in the right child.
@@ -369,6 +388,7 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
369
388
References
370
389
----------
371
390
.. [1] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001.
391
+
372
392
"""
373
393
def __init__ (self , n_estimators = 10 , criterion = 'mse' , max_depth = None ,
374
394
min_samples_split = 2 , min_samples_leaf = 1 ,
0 commit comments