8000 Merge pull request #59 from scikit-optimize/master · scikit-optimize/scikit-optimize@c8ba0e8 · GitHub
[go: up one dir, main page]

Skip to content
This repository was archived by the owner on Feb 28, 2024. It is now read-only.

Commit c8ba0e8

Browse files
authored
Merge pull request #59 from scikit-optimize/master
merge upstream
2 parents 07dd020 + fa96058 commit c8ba0e8

File tree

9 files changed

+236
-229
lines changed

9 files changed

+236
-229
lines changed

skopt/learning/forest.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def _return_std(X, trees, predictions, min_variance):
2828
std : array-like, shape=(n_samples,)
2929
Standard deviation of `y` at `X`. If criterion
3030
is set to "mse", then `std[i] ~= std(y | X[i])`.
31+
3132
"""
3233
# This derives std(y | x) as described in 4.3.2 of arXiv:1211.0906
3334
std = np.zeros(len(X))
@@ -68,6 +69,7 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
6869
6970
max_features : int, float, string or None, optional (default="auto")
7071
The number of features to consider when looking for the best split:
72+
7173
- If int, then consider `max_features` features at each split.
7274
- If float, then `max_features` is a percentage and
7375
`int(max_features * n_features)` features are considered at each
@@ -76,9 +78,12 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
7678
- If "sqrt", then `max_features=sqrt(n_features)`.
7779
- If "log2", then `max_features=log2(n_features)`.
7880
- If None, then `max_features=n_features`.
79-
Note: the search for a split does not stop until at least one
80-
valid partition of the node samples is found, even if it requires to
81-
effectively inspec 8000 t more than ``max_features`` features.
81+
82+
.. note::
83+
The search for a split does not stop until at least one
84+
valid partition of the node samples is found, even if it
85+
requires to effectively inspect more than ``max_features``
86+
features.
8287
8388
max_depth : integer or None, optional (default=None)
8489
The maximum depth of the tree. If None, then nodes are expanded until
@@ -87,13 +92,15 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
8792
8893
min_samples_split : int, float, optional (default=2)
8994
The minimum number of samples required to split an internal node:
95+
9096
- If int, then consider `min_samples_split` as the minimum number.
9197
- If float, then `min_samples_split` is a percentage and
9298
`ceil(min_samples_split * n_samples)` are the minimum
9399
number of samples for each split.
94100
95101
min_samples_leaf : int, float, optional (default=1)
96102
The minimum number of samples required to be at a leaf node:
103+
97104
- If int, then consider `min_samples_leaf` as the minimum number.
98105
- If float, then `min_samples_leaf` i 6D40 s a percentage and
99106
`ceil(min_samples_leaf * n_samples)` are the minimum
@@ -113,8 +120,10 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
113120
A node will be split if this split induces a decrease of the impurity
114121
greater than or equal to this value.
115122
The weighted impurity decrease equation is the following::
123+
116124
N_t / N * (impurity - N_t_R / N_t * right_impurity
117125
- N_t_L / N_t * left_impurity)
126+
118127
where ``N`` is the total number of samples, ``N_t`` is the number of
119128
samples at the current node, ``N_t_L`` is the number of samples in the
120129
left child, and ``N_t_R`` is the number of samples in the right child.
@@ -183,6 +192,7 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
183192
References
184193
----------
185194
.. [1] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001.
195+
186196
"""
187197
def __init__(self, n_estimators=10, criterion='mse', max_depth=None,
188198
min_samples_split=2, min_samples_leaf=1,
@@ -224,6 +234,7 @@ def predict(self, X, return_std=False):
224234
std : array-like of shape=(n_samples,)
225235
Standard deviation of `y` at `X`. If criterion
226236
is set to "mse", then `std[i] ~= std(y | X[i])`.
237+
227238
"""
228239
mean = super(RandomForestRegressor, self).predict(X)
229240

@@ -254,6 +265,7 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
254265
255266
max_features : int, float, string or None, optional (default="auto")
256267
The number of features to consider when looking for the best split:
268+
257269
- If int, then consider `max_features` features at each split.
258270
- If float, then `max_features` is a percentage and
259271
`int(max_features * n_features)` features are considered at each
@@ -262,9 +274,12 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
262274
- If "sqrt", then `max_features=sqrt(n_features)`.
263275
- If "log2", then `max_features=log2(n_features)`.
264276
- If None, then `max_features=n_features`.
265-
Note: the search for a split does not stop until at least one
266-
valid partition of the node samples is found, even if it requires to
267-
effectively inspect more than ``max_features`` features.
277+
278+
.. note::
279+
The search for a split does not stop until at least one
280+
valid partition of the node samples is found, even if it
281+
requires to effectively inspect more than ``max_features``
282+
features.
268283
269284
max_depth : integer or None, optional (default=None)
270285
The maximum depth of the tree. If None, then nodes are expanded until
@@ -273,13 +288,15 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
273288
274289
min_samples_split : int, float, optional (default=2)
275290
The minimum number of samples required to split an internal node:
291+
276292
- If int, then consider `min_samples_split` as the minimum number.
277293
- If float, then `min_samples_split` is a percentage and
278294
`ceil(min_samples_split * n_samples)` are the minimum
279295
number of samples for each split.
280296
281297
min_samples_leaf : int, float, optional (default=1)
282298
The minimum number of samples required to be at a leaf node:
299+
283300
- If int, then consider `min_samples_leaf` as the minimum number.
284301
- If float, then `min_samples_leaf` is a percentage and
285302
`ceil(min_samples_leaf * n_samples)` are the minimum
@@ -299,8 +316,10 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
299316
A node will be split if this split induces a decrease of the impurity
300317
greater than or equal to this value.
301318
The weighted impurity decrease equation is the following::
319+
302320
N_t / N * (impurity - N_t_R / N_t * right_impurity
303321
- N_t_L / N_t * left_impurity)
322+
304323
where ``N`` is the total number of samples, ``N_t`` is the number of
305324
samples at the current node, ``N_t_L`` is the number of samples in the
306325
left child, and ``N_t_R`` is the number of samples in the right child.
@@ -369,6 +388,7 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
369388
References
370389
----------
371390
.. [1] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001.
391+
372392
"""
373393
def __init__(self, n_estimators=10, criterion='mse', max_depth=None,
374394
min_samples_split=2, min_samples_leaf=1,

skopt/optimizer/base.py

Lines changed: 36 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def base_minimize(func, dimensions, base_estimator,
3030
callback=None, n_points=10000, n_restarts_optimizer=5,
3131
xi=0.01, kappa=1.96, n_jobs=1, model_queue_size=None):
3232
"""Base optimizer class
33+
3334
Parameters
3435
----------
3536
func : callable
@@ -53,42 +54,43 @@ def base_minimize(func, dimensions, base_estimator,
5354
- an instance of a `Dimension` object (`Real`, `Integer` or
5455
`Categorical`).
5556
56-
NOTE: The upper and lower bounds are inclusive for `Integer`
57-
dimensions.
57+
.. note:: The upper and lower bounds are inclusive for `Integer`
58+
dimensions.
5859
5960
base_estimator : sklearn regressor
6061
Should inherit from `sklearn.base.RegressorMixin`.
6162
In addition, should have an optional `return_std` argument,
62-
which returns `std(Y | x)`` along with `E[Y | x]`.
63+
which returns `std(Y | x)` along with `E[Y | x]`.
6364
64-
n_calls : int, default=100
65-
Maximum number of calls to `func`. An objective fucntion will
65+
n_calls : int, default: 100
66+
Maximum number of calls to `func`. An objective function will
6667
always be evaluated this number of times; Various options to
6768
supply initialization points do not affect this value.
6869
69-
n_random_starts : int, default=10
70+
n_random_starts : int, default: None
7071
Number of evaluations of `func` with random points before
7172
approximating it with `base_estimator`.
72-
.. deprecated:: 0.9
73+
74+
.. deprecated:: 0.8
7375
use `n_initial_points` instead.
7476
75-
n_initial_points : int, default=10
77+
n_initial_points : int, default: 10
7678
Number of evaluations of `func` with initialization points
7779
before approximating it with `base_estimator`. Initial point
7880
generator can be changed by setting `initial_point_generator`.
7981
8082
initial_point_generator : str, InitialPointGenerator instance, \
81-
default='random'
83+
default: `"random"`
8284
Sets a initial points generator. Can be either
8385
84-
- "random" for uniform random numbers,
85-
- "sobol" for a Sobol sequence,
86-
- "halton" for a Halton sequence,
87-
- "hammersly" for a Hammersly sequence,
88-
- "lhs" for a latin hypercube sequence,
89-
- "grid" for a uniform grid sequence
86+
- `"random"` for uniform random numbers,
87+
- `"sobol"` for a Sobol sequence,
88+
- `"halton"` for a Halton sequence,
89+
- `"hammersly"` for a Hammersly sequence,
90+
- `"lhs"` for a latin hypercube sequence,
91+
- `"grid"` for a uniform grid sequence
9092
91-
acq_func : string, default=`"EI"`
93+
acq_func : string, default: `"EI"`
9294
Function to minimize over the posterior distribution. Can be either
9395
9496
- `"LCB"` for lower confidence bound,
@@ -100,22 +102,23 @@ def base_minimize(func, dimensions, base_estimator,
100102
the second being the time taken in seconds.
101103
- `"PIps"` for negated probability of improvement per second. The
102104
return type of the objective function is assumed to be similar to
103-
that of `"EIps
105+
that of `"EIps"`
104106
105-
acq_optimizer : string, `"sampling"` or `"lbfgs"`, default=`"lbfgs"`
106-
Method to minimize the acquistion function. The fit model
107+
acq_optimizer : string, `"sampling"` or `"lbfgs"`, default: `"lbfgs"`
108+
Method to minimize the acquisition function. The fit model
107109
is updated with the optimal value obtained by optimizing `acq_func`
108110
with `acq_optimizer`.
109111
110112
- If set to `"sampling"`, then `acq_func` is optimized by computing
111113
`acq_func` at `n_points` randomly sampled points and the smallest
112114
value found is used.
113115
- If set to `"lbfgs"`, then
114-
- The `n_restarts_optimizer` no. of points which the acquisition
115-
function is least are taken as start points.
116-
- `"lbfgs"` is run for 20 iterations with these points as initial
117-
points to find local minima.
118-
- The optimal of these local minima is used to update the prior.
116+
117+
- The `n_restarts_optimizer` no. of points which the acquisition
118+
function is least are taken as start points.
119+
- `"lbfgs"` is run for 20 iterations with these points as initial
120+
points to find local minima.
121+
- The optimal of these local minima is used to update the prior.
119122
120123
x0 : list, list of lists or `None`
121124
Initial input points.
@@ -146,41 +149,41 @@ def base_minimize(func, dimensions, base_estimator,
146149
Set random state to something other than None for reproducible
147150
results.
148151
149-
verbose : boolean, default=False
152+
verbose : boolean, default: False
150153
Control the verbosity. It is advised to set the verbosity to True
151154
for long optimization runs.
152155
153156
callback : callable, list of callables, optional
154157
If callable then `callback(res)` is called after each call to `func`.
155158
If list of callables, then each callable in the list is called.
156159
157-
n_points : int, default=10000
160+
n_points : int, default: 10000
158161
If `acq_optimizer` is set to `"sampling"`, then `acq_func` is
159162
optimized by computing `acq_func` at `n_points` randomly sampled
160163
points.
161164
162-
n_restarts_optimizer : int, default=5
165+
n_restarts_optimizer : int, default: 5
163166
The number of restarts of the optimizer when `acq_optimizer`
164167
is `"lbfgs"`.
165168
166-
xi : float, default=0.01
169+
xi : float, default: 0.01
167170
Controls how much improvement one wants over the previous best
168171
values. Used when the acquisition is either `"EI"` or `"PI"`.
169172
170-
kappa : float, default=1.96
173+
kappa : float, default: 1.96
171174
Controls how much of the variance in the predicted values should be
172175
taken into account. If set to be very high, then we are favouring
173176
exploration over exploitation and vice versa.
174177
Used when the acquisition is `"LCB"`.
175178
176-
n_jobs : int, default=1
179+
n_jobs : int, default: 1
177180
Number of cores to run in parallel while running the lbfgs
178181
optimizations over the acquisition function. Valid only when
179182
`acq_optimizer` is set to "lbfgs."
180183
Defaults to 1 core. If `n_jobs=-1`, then number of jobs is set
181184
to number of cores.
182185
183-
model_queue_size : int or None, default=None
186+
model_queue_size : int or None, default: None
184187
Keeps list of models only as long as the argument given. In the
185188
case of None, the list has no capped length.
186189
@@ -191,22 +194,15 @@ def base_minimize(func, dimensions, base_estimator,
191194
Important attributes are:
192195
193196
- `x` [list]: location of the minimum.
194-
195197
- `fun` [float]: function value at the minimum.
196-
197198
- `models`: surrogate models used for each iteration.
198-
199199
- `x_iters` [list of lists]: location of function evaluation for each
200-
iteration.
201-
200+
iteration.
202201
- `func_vals` [array]: function value for each iteration.
203-
204202
- `space` [Space]: the optimization space.
205-
206203
- `specs` [dict]`: the call specifications.
207-
208204
- `rng` [RandomState instance]: State of the random state
209-
at the end of minimization.
205+
at the end of minimization.
210206
211207
For more details related to the OptimizeResult object, refer
212208
http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html

skopt/optimizer/dummy.py

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,19 @@ def dummy_minimize(func, dimensions, n_calls=100,
3232
- an instance of a `Dimension` object (`Real`, `Integer` or
3333
`Categorical`).
3434
35-
n_calls : int, default=100
35+
n_calls : int, default: 100
3636
Number of calls to `func` to find the minimum.
3737
3838
initial_point_generator : str, InitialPointGenerator instance, \
39-
default='random'
39+
default: `"random"`
4040
Sets a initial points generator. Can be either
4141
42-
- "random" for uniform random numbers,
43-
- "sobol" for a Sobol sequence,
44-
- "halton" for a Halton sequence,
45-
- "hammersly" for a Hammersly sequence,
46-
- "lhs" for a latin hypercube sequence,
47-
- "grid" for a uniform grid sequence
42+
- `"random"` for uniform random numbers,
43+
- `"sobol"` for a Sobol sequence,
44+
- `"halton"` for a Halton sequence,
45+
- `"hammersly"` for a Hammersly sequence,
46+
- `"lhs"` for a latin hypercube sequence,
47+
- `"grid"` for a uniform grid sequence
4848
4949
x0 : list, list of lists or `None`
5050
Initial input points.
@@ -68,15 +68,15 @@ def dummy_minimize(func, dimensions, n_calls=100,
6868
Set random state to something other than None for reproducible
6969
results.
7070
71-
verbose : boolean, default=False
71+
verbose : boolean, default: False
7272
Control the verbosity. It is advised to set the verbosity to True
7373
for long optimization runs.
7474
7575
callback : callable, list of callables, optional
7676
If callable then `callback(res)` is called after each call to `func`.
7777
If list of callables, then each callable in the list is called.
7878
79-
model_queue_size : int or None, default=None
79+
model_queue_size : int or None, default: None
8080
Keeps list of models only as long as the argument given. In the
8181
case of None, the list has no capped length.
8282
@@ -87,23 +87,20 @@ def dummy_minimize(func, dimensions, n_calls=100,
8787
Important attributes are:
8888
8989
- `x` [list]: location of the minimum.
90-
9190
- `fun` [float]: function value at the minimum.
92-
9391
- `x_iters` [list of lists]: location of function evaluation for each
94-
iteration.
95-
92+
iteration.
9693
- `func_vals` [array]: function value for each iteration.
97-
9894
- `space` [Space]: the optimisation space.
99-
10095
- `specs` [dict]: the call specifications.
101-
10296
- `rng` [RandomState instance]: State of the random state
103-
at the end of minimization.
97+
at the end of minimization.
10498
10599
For more details related to the OptimizeResult object, refer
106100
http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
101+
102+
.. seealso:: functions :class:`skopt.gp_minimize`,
103+
:class:`skopt.forest_minimize`, :class:`skopt.gbrt_minimize`
107104
"""
108105
# all our calls want random suggestions, except if we need to evaluate
109106
# some initial points

0 commit comments

Comments
 (0)
0