From 1b8ba34133fd5865d017e15163ac261a3cb0e1d1 Mon Sep 17 00:00:00 2001
From: Magnus <magnus@hvass-labs.org>
Date: Mon, 8 Jan 2018 10:31:10 +0100
Subject: [PATCH 001/265] Added checking-functions for readability and
 reusability.

---
 skopt/utils.py | 84 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 67 insertions(+), 17 deletions(-)

diff --git a/skopt/utils.py b/skopt/utils.py
index 9c824643d..4ed6b4c26 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -501,6 +501,71 @@ def normalize_dimensions(dimensions):
     return Space(transformed_dimensions)
 
 
+def check_list_types(x, types):
+    """
+    Check whether all elements of a list `x` are of the correct type(s)
+    and raise a ValueError if they are not.
+    
+    Note that `types` can be either a single object-type or a tuple
+    of object-types.
+    
+    Parameters
+    ----------
+    * `x` [list]:
+        List of objects.
+
+    * `types` [object or list(object)]:
+        Either a single object-type or a tuple of object-types.
+
+    Exceptions
+    ----------
+    * `ValueError`:
+        If one or more element in the list `x` is not of the correct type(s).
+
+    Returns
+    -------
+    * Nothing.
+    """
+
+    # List of the elements in the list that are incorrectly typed.
+    err = list(filter(lambda a: not isinstance(a, types), x))
+
+    # If the list is non-empty then raise an exception.
+    if len(err) > 0:
+        msg = "All elements in list must be instances of {}, but found: {}"
+        msg = msg.format(types, err)
+        raise ValueError(msg)
+
+
+def check_dimension_names(dimensions):
+    """
+    Check whether all dimensions have names.
+
+    Parameters
+    ----------
+    * `dimensions` [list(Dimension)]:
+        List of Dimension-objects.
+
+    Exceptions
+    ----------
+    * `ValueError`:
+        If one or more dimensions are unnamed.
+
+    Returns
+    -------
+    * Nothing.
+    """
+
+    # List of the dimensions that have no names.
+    err_dims = list(filter(lambda dim: dim.name is None, dimensions))
+
+    # If the list is non-empty then raise an exception.
+    if len(err_dims) > 0:
+        msg = "All dimensions must have names, but found: {}"
+        msg = msg.format(err_dims)
+        raise ValueError(msg)
+
+
 def use_named_args(dimensions):
     """
     Wrapper / decorator for an objective function that uses named arguments
@@ -579,25 +644,10 @@ def decorator(func):
         """
 
         # Ensure all dimensions are correctly typed.
-        if not all(isinstance(dim, Dimension) for dim in dimensions):
-            # List of the dimensions that are incorrectly typed.
-            err_dims = list(filter(lambda dim: not isinstance(dim, Dimension),
-                                   dimensions))
-
-            # Error message.
-            msg = "All dimensions must be instances of the Dimension-class, but found: {}"
-            msg = msg.format(err_dims)
-            raise ValueError(msg)
+        check_list_types(dimensions, Dimension)
 
         # Ensure all dimensions have names.
-        if any(dim.name is None for dim in dimensions):
-            # List of the dimensions that have no names.
-            err_dims = list(filter(lambda dim: dim.name is None, dimensions))
-
-            # Error message.
-            msg = "All dimensions must have names, but found: {}"
-            msg = msg.format(err_dims)
-            raise ValueError(msg)
+        check_dimension_names(dimensions)
 
         @wraps(func)
         def wrapper(x):

From 6db2cdd2e958cfa999c1b2c64c46520bdc165b13 Mon Sep 17 00:00:00 2001
From: Magnus <magnus@hvass-labs.org>
Date: Sun, 17 Dec 2017 15:23:19 +0100
Subject: [PATCH 002/265] Rewrote plots.py. Added two plotting functions.
 Better support for dimension-naming.

---
 skopt/plots.py            | 1045 ++++++++++++++++++++++++++++---------
 skopt/space/space.py      |  174 +++++-
 skopt/tests/test_space.py |   10 +-
 skopt/utils.py            |   41 +-
 4 files changed, 1008 insertions(+), 262 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 46488a9bf..f411bdf4b 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -9,6 +9,10 @@
 
 from scipy.optimize import OptimizeResult
 
+from skopt.space import Categorical
+from skopt.utils import get_samples_dimension
+from collections import Counter
+
 
 def plot_convergence(*args, **kwargs):
     """Plot one or several convergence traces.
@@ -39,6 +43,7 @@ def plot_convergence(*args, **kwargs):
     * `ax`: [`Axes`]:
         The matplotlib axes.
     """
+
     # <3 legacy python
     ax = kwargs.get("ax", None)
     true_minimum = kwargs.get("true_minimum", None)
@@ -93,322 +98,880 @@ def plot_convergence(*args, **kwargs):
     return ax
 
 
-def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None):
-    # Work out min, max of y axis for the diagonal so we can adjust
-    # them all to the same value
-    diagonal_ylim = (np.min([ax[i, i].get_ylim()[0]
-                             for i in range(space.n_dims)]),
-                     np.max([ax[i, i].get_ylim()[1]
-                             for i in range(space.n_dims)]))
-
-    if dim_labels is None:
-        dim_labels = ["$X_{%i}$" % i if d.name is None else d.name
-                for i, d in enumerate(space.dimensions)]
-
-    # Deal with formatting of the axes
-    for i in range(space.n_dims):  # rows
-        for j in range(space.n_dims):  # columns
-            ax_ = ax[i, j]
-
-            if j > i:
-                ax_.axis("off")
-
-            # off-diagonal axis
-            if i != j:
-                # plots on the diagonal are special, like Texas. They have
-                # their own range so do not mess with them.
-                ax_.set_ylim(*space.dimensions[i].bounds)
-                ax_.set_xlim(*space.dimensions[j].bounds)
-                if j > 0:
-                    ax_.set_yticklabels([])
-                else:
-                    ax_.set_ylabel(dim_labels[i])
-
-                # for all rows except ...
-                if i < space.n_dims - 1:
-                    ax_.set_xticklabels([])
-                # ... the bottom row
-                else:
-                    [l.set_rotation(45) for l in ax_.get_xticklabels()]
-                    ax_.set_xlabel(dim_labels[j])
-                
-                # configure plot for linear vs log-scale
-                priors = (space.dimensions[j].prior, space.dimensions[i].prior)
-                scale_setters = (ax_.set_xscale, ax_.set_yscale)
-                loc_setters = (ax_.xaxis.set_major_locator,
-                               ax_.yaxis.set_major_locator)
-                for set_major_locator, set_scale, prior in zip(
-                        loc_setters, scale_setters, priors):
-                    if prior == 'log-uniform':
-                        set_scale('log')
-                    else:
-                        set_major_locator(MaxNLocator(6, prune='both'))
-
-            else:
-                ax_.set_ylim(*diagonal_ylim)
-                ax_.yaxis.tick_right()
-                ax_.yaxis.set_label_position('right')
-                ax_.yaxis.set_ticks_position('both')
-                ax_.set_ylabel(ylabel)
-
-                ax_.xaxis.tick_top()
-                ax_.xaxis.set_label_position('top')
-                ax_.set_xlabel(dim_labels[j])
-
-                if space.dimensions[i].prior == 'log-uniform':
-                    ax_.set_xscale('log')
-                else:
-                    ax_.xaxis.set_major_locator(MaxNLocator(6, prune='both'))
+def _get_ylim_diagonal(ax):
+    """Get the min / max of the ylim for all diagonal plots.
+    This is used in _adjust_fig() so the ylim is the same
+    for all diagonal plots.
 
-    return ax
+    Parameters
+    ----------
+    * `ax` [`Matplotlib.Axes`]:
+        2-dimensional matrix with Matplotlib Axes objects.
+
+    Returns
+    -------
+    * `ylim_diagonal` [list(int)]
+        The common min and max ylim for the diagonal plots.
+    """
+
+    # Number of search-space dimensions used in this plot.
+    n_dims = len(ax)
+
+    # Get ylim for all diagonal plots.
+    ylim = [ax[row, row].get_ylim() for row in range(n_dims)]
+
+    # Separate into two lists with low and high ylim.
+    ylim_lo, ylim_hi = zip(*ylim)
+
+    # Min and max ylim for all diagonal plots.
+    ylim_min = np.min(ylim_lo)
+    ylim_max = np.max(ylim_hi)
+
+    # The common ylim for the diagonal plots.
+    ylim_diagonal = [ylim_min, ylim_max]
+
+    return ylim_diagonal
+
+
+def _adjust_fig(fig, ax, space, ylabel, dimensions):
+    """
+    Process and adjust a 2-dimensional plot-matrix in various ways,
+    by writing axis-labels, etc.
+    
+    This is used by plot_objective() and plot_evaluations().
+    
+    Parameters
+    ----------
+    * `fig` [`Matplotlib.Figure`]:
+        Figure-object for the plots.
+
+    * `ax` [`Matplotlib.Axes`]:
+        2-dimensional matrix with Matplotlib Axes objects.
+
+    * `space` [`Space`]:
+        Search-space object.
+
+    * `ylabel` [`str`]:
+        String to be printed on the top-left diagonal plot
+        e.g. 'Sample Count'.
+
+    * `dimensions` [`list(Dimension)`]:
+        List of `Dimension` objects used in the plots.
+
+    Returns
+    -------
+    * Nothing.
+    """
+
+    # Adjust spacing of the figure.
+    # This looks bad on some outputs so it has been disabled for now.
+    # fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
+    #                     hspace=0.1, wspace=0.1)
+
+    # Get min/max ylim for the diagonal plots, used to normalize their y-axis.
+    ylim_diagonal = _get_ylim_diagonal(ax=ax)
+
+    # The following for-loops process the sub-plots inside the 2-d matrix.
+    # This could perhaps also be implemented using other Python tricks,
+    # but these for-loops are probably much easier to understand.
+    # Similarly, they have been separated into several for-loops to make
+    # them easier to understand and modify.
+
+    # Number of search-space dimensions used in this plot.
+    n_dims = len(dimensions)
 
+    # Process the plots on the diagonal.
+    for row in range(n_dims):
+        # Get the search-space dimension for this row.
+        dim = dimensions[row]
 
-def partial_dependence(space, model, i, j=None, sample_points=None,
-                       n_samples=250, n_points=40):
-    """Calculate the partial dependence for dimensions `i` and `j` with
-    respect to the objective value, as approximated by `model`.
+        # Reference to the diagonal plot for this row.
+        a = ax[row, row]
 
-    The partial dependence plot shows how the value of the dimensions
-    `i` and `j` influence the `model` predictions after "averaging out"
-    the influence of all other dimensions.
+        # Write the dimension-name as a label on top of the diagonal plot.
+        a.xaxis.set_label_position('top')
+        a.set_xlabel(dim.name)
+
+        # Set the x-axis limits to correspond to the search-space bounds.
+        a.set_xlim(dim.bounds)
+
+        # Use a common limit for the y-axis on all diagonal plots.
+        a.set_ylim(ylim_diagonal)
+
+        # Use log-scale on the x-axis?
+        if dim.prior == 'log-uniform':
+            a.set_xscale('log')
+
+    # Process the plots below the diagonal.
+    for row in range(n_dims):
+        # Get the search-space dimension for this row.
+        dim_row = dimensions[row]
+
+        # Only iterate until the diagonal.
+        for col in range(row):
+            # Get the search-space dimension for this column.
+            dim_col = dimensions[col]
+
+            # Reference to the plot for this row and column.
+            a = ax[row, col]
+
+            # Plot a grid.
+            a.grid(True)
+
+            # Set the plot-limits to correspond to the search-space bounds.
+            a.set_xlim(dim_col.bounds)
+            a.set_ylim(dim_row.bounds)
+
+            # Use log-scale on the x-axis?
+            if dim_col.prior == 'log-uniform':
+                a.set_xscale('log')
+
+            # Use log-scale on the y-axis?
+            if dim_row.prior == 'log-uniform':
+                a.set_yscale('log')
+
+    # Turn off all plots to the upper-right of the diagonal.
+    for row in range(n_dims):
+        for col in range(row+1, n_dims):
+            ax[row, col].axis("off")
+
+    # Set the designated ylabel for the top-left plot.
+    row = col = 0
+    ax[row, col].set_ylabel(ylabel)
+
+    # Set the dimension-names for the left-most column.
+    col = 0
+    for row in range(1, n_dims):
+        ax[row, col].set_ylabel(dimensions[row].name)
+
+    # Set the dimension-names for the bottom row.
+    row = n_dims - 1
+    for col in range(0, n_dims):
+        ax[row, col].set_xlabel(dimensions[col].name)
+
+    # Remove the y-tick labels for all plots except the left-most column.
+    for row in range(n_dims):
+        for col in range(1, n_dims):
+            ax[row, col].set_yticklabels([])
+
+    # Remove the x-tick labels for all plots except the bottom row.
+    for row in range(n_dims-1):
+        for col in range(n_dims):
+            ax[row, col].set_xticklabels([])
+
+
+def _map_bins(bins, bounds, prior):
+    """
+    For use when plotting histograms.
+    Maps the number of bins to a log-scale between the bounds, if necessary.
 
     Parameters
     ----------
-    * `space` [`Space`]
-        The parameter space over which the minimization was performed.
+    * `bins` [int]
+        Number of bins in the histogram.
+
+    * `bounds` [(int, int)]
+        Tuple or list with lower- and upper-bounds for a search-space dimension.
+
+    * `prior` [str or None]
+        If 'log-uniform' then use log-scaling for the bins,
+        otherwise use the original number of bins.
+
+    Returns
+    -------
+    * `bins_mapped`: [int or np.array(int)]:
+         Number of bins for a histogram if no mapping,
+         or a log-scaled array of bin-points if mapping is needed.
+    """
+
+    if prior == 'log-uniform':
+        # Map the number of bins to a log-space for the dimension bounds.
+        bins_mapped = np.logspace(*np.log10(bounds), bins)
+    else:
+        # Use the original number of bins.
+        bins_mapped = bins
+
+    return bins_mapped
+
+
+def partial_dependence_1D(model, dimension, samples, n_points=40):
+    """
+    Calculate the partial dependence for a single dimension.
+    
+    This uses the given model to calculate the average objective value
+    for all the samples, where the given dimension is fixed at
+    regular intervals between its bounds.
+
+    This shows how the given dimension affects the objective value
+    when the influence of all other dimensions are averaged out.
 
+    Parameters
+    ----------
     * `model`
         Surrogate model for the objective function.
 
-    * `i` [int]
-        The first dimension for which to calculate the partial dependence.
+    * `dimension` [Dimension]
+        The `Dimension`-object for which to calculate the partial dependence.
 
-    * `j` [int, default=None]
-        The second dimension for which to calculate the partial dependence.
-        To calculate the 1D partial dependence on `i` alone set `j=None`.
-
-    * `sample_points` [np.array, shape=(n_points, n_dims), default=None]
+    * `samples` [np.array, shape=(n_points, n_dims)]
         Randomly sampled and transformed points to use when averaging
         the model function at each of the `n_points`.
 
-    * `n_samples` [int, default=100]
-        Number of random samples to use for averaging the model function
-        at each of the `n_points`. Only used when `sample_points=None`.
-
     * `n_points` [int, default=40]
-        Number of points at which to evaluate the partial dependence
-        along each dimension `i` and `j`.
+        Number of points along each dimension where the partial dependence
+        is evaluated.
 
     Returns
     -------
-    For 1D partial dependence:
-
     * `xi`: [np.array]:
         The points at which the partial dependence was evaluated.
 
     * `yi`: [np.array]:
-        The value of the model at each point `xi`.
+        The average value of the modelled objective function at each point `xi`.
+    """
+
+    def _calc(x):
+        """
+        Helper-function to calculate the average predicted
+        objective value for the given model, when setting
+        the index'th dimension of the search-space to the value x,
+        and then averaging over all samples.
+        """
+
+        # Copy the samples so we don't destroy the originals.
+        samples_copy = np.copy(samples)
+
+        # Set the index'th dimension to x for all samples.
+        samples_copy[:, index] = x
+
+        # Calculate the predicted objective value for all samples.
+        y_pred = model.predict(samples_copy)
+
+        # The average predicted value for the objective function.
+        y_pred_mean = np.mean(y_pred)
+
+        return y_pred_mean
+
+    # Get search-space index for the given dimension.
+    index = dimension.index
+
+    # Get the bounds of the dimension.
+    bounds = dimension.bounds
+
+    # Generate evenly spaced points between the bounds.
+    xi = np.linspace(bounds[0], bounds[1], n_points)
+
+    # Transform the points if necessary.
+    xi_transformed = dimension.transform(xi)
 
-    For 2D partial dependence:
+    # Calculate the partial dependence for all the points.
+    yi = [_calc(x) for x in xi_transformed]
+
+    return xi, yi
+
+
+def partial_dependence_2D(model, dimension1, dimension2, samples, n_points=40):
+    """
+    Calculate the partial dependence for two dimensions in the search-space.
+
+    This uses the given model to calculate the average objective value
+    for all the samples, where the given dimensions are fixed at
+    regular intervals between their bounds.
+
+    This shows how the given dimensions affect the objective value
+    when the influence of all other dimensions are averaged out.
+
+    Parameters
+    ----------
+    * `model`
+        Surrogate model for the objective function.
+
+    * `dimension1` [Dimension]
+        The first `Dimension`-object for which to calculate the
+        partial dependence.
+
+    * `dimension2` [Dimension]
+        The second `Dimension`-object for which to calculate the
+        partial dependence.
+
+    * `samples` [np.array, shape=(n_points, n_dims)]
+        Randomly sampled and transformed points to use when averaging
+        the model function at each of the `n_points`.
 
+    * `n_points` [int, default=40]
+        Number of points along each dimension where the partial dependence
+        is evaluated.
+
+    Returns
+    -------
     * `xi`: [np.array, shape=n_points]:
         The points at which the partial dependence was evaluated.
+
     * `yi`: [np.array, shape=n_points]:
         The points at which the partial dependence was evaluated.
+
     * `zi`: [np.array, shape=(n_points, n_points)]:
-        The value of the model at each point `(xi, yi)`.
+        The average value of the objective function at each point `(xi, yi)`.
     """
-    if sample_points is None:
-        sample_points = space.transform(space.rvs(n_samples=n_samples))
 
-    if j is None:
-        bounds = space.dimensions[i].bounds
-        # XXX use linspace(*bounds, n_points) after python2 support ends
-        xi = np.linspace(bounds[0], bounds[1], n_points)
-        xi_transformed = space.dimensions[i].transform(xi)
+    def _calc(x, y):
+        """
+        Helper-function to calculate the average predicted
+        objective value for the given model, when setting
+        the index1'th dimension of the search-space to the value x
+        and setting the index2'th dimension to the value y,
+        and then averaging over all samples.
+        """
+
+        # Copy the samples so we don't destroy the originals.
+        samples_copy = np.copy(samples)
+
+        # Set the index1'th dimension to x for all samples.
+        samples_copy[:, index1] = x
+
+        # Set the index2'th dimension to y for all samples.
+        samples_copy[:, index2] = y
+
+        # Calculate the predicted objective value for all samples.
+        z_pred = model.predict(samples_copy)
+
+        # The average predicted value for the objective function.
+        z_pred_mean = np.mean(z_pred)
+
+        return z_pred_mean
+
+    # Get search-space indices for the dimensions.
+    index1 = dimension1.index
+    index2 = dimension2.index
+
+    # Get search-space bounds for the dimensions.
+    bounds1 = dimension1.bounds
+    bounds2 = dimension2.bounds
+
+    # Generate evenly spaced points between the dimension bounds.
+    xi = np.linspace(bounds1[0], bounds1[1], n_points)
+    yi = np.linspace(bounds2[0], bounds2[1], n_points)
+
+    # Transform the points if necessary.
+    xi_transformed = dimension1.transform(xi)
+    yi_transformed = dimension2.transform(yi)
 
-        yi = []
-        for x_ in xi_transformed:
-            rvs_ = np.array(sample_points)
-            rvs_[:, i] = x_
-            yi.append(np.mean(model.predict(rvs_)))
+    # Calculate the partial dependence for all combinations of these points.
+    zi = [[_calc(x, y) for x in xi_transformed] for y in yi_transformed]
 
-        return xi, yi
+    # Convert list-of-list to a numpy array.
+    zi = np.array(zi)
 
+    return xi, yi, zi
+
+
+def plot_evaluations(result, bins=20, dimension_names=None):
+    """
+    Visualize the order in which points were sampled during optimization.
+
+    This creates a 2-d matrix plot where the diagonal plots are histograms
+    that show the distribution of samples for each search-space dimension.
+
+    The plots below the diagonal are scatter-plots of the samples for
+    all combinations of search-space dimensions.
+
+    The ordering of the samples are shown as different colour-shades.
+
+    A red star shows the best found parameters.
+
+    NOTE: Search-spaces with `Categorical` dimensions are not supported.
+
+    Parameters
+    ----------
+    * `result` [`OptimizeResult`]
+        The optimization results from calling e.g. `gp_minimize()`.
+
+    * `bins` [int, bins=20]:
+        Number of bins to use for histograms on the diagonal.
+
+    * `dimension_names` [list(str)]:
+        List of names for search-space dimensions to be used in the plot.
+        You can omit `Categorical` dimensions here as they are not supported. 
+        If `None` then use all dimensions from the search-space.
+
+    Returns
+    -------
+    * `fig`: [`Matplotlib.Figure`]:
+        The object for the figure.
+        For example, call `fig.savefig('plot.png')` to save the plot.
+
+    * `ax`: [`Matplotlib.Axes`]:
+        A 2-d matrix of Axes-objects with the sub-plots.
+    """
+
+    # Get the search-space instance from the optimization results.
+    space = result.space
+
+    # Get the relevant search-space dimensions.
+    if dimension_names is None:
+        # Get all dimensions.
+        dimensions = space.dimensions
     else:
-        # XXX use linspace(*bounds, n_points) after python2 support ends
-        bounds = space.dimensions[j].bounds
-        xi = np.linspace(bounds[0], bounds[1], n_points)
-        xi_transformed = space.dimensions[j].transform(xi)
+        # Only get the named dimensions.
+        dimensions = space[dimension_names]
+
+    # Ensure there are no categorical dimensions.
+    # TODO replace with check_list_types(dimensions, (Integer, Real)) in PR #597
+    if any(type(dim) == Categorical for dim in dimensions):
+        raise ValueError("Categorical dimension is not supported.")
+
+    # Number of search-space dimensions we are using.
+    n_dims = len(dimensions)
+
+    # Create a figure for plotting a 2-d matrix of sub-plots.
+    fig, ax = plt.subplots(n_dims, n_dims, figsize=(2 * n_dims, 2 * n_dims))
+
+    # Used to plot colour-shades for the sample-ordering.
+    # It is just a range from 0 to the number of samples.
+    sample_order = range(len(result.x_iters))
+
+    # For all rows in the 2-d plot matrix.
+    for row in range(n_dims):
+        # Get the search-space dimension for this row.
+        dim_row = dimensions[row]
+
+        # Get the index for the search-space dimension.
+        # This is used to lookup that particular dimension in some functions.
+        index_row = dim_row.index
+
+        # Get the samples from the optimization-log for this dimension.
+        samples_row = get_samples_dimension(result=result, index=index_row)
 
-        bounds = space.dimensions[i].bounds
-        yi = np.linspace(bounds[0], bounds[1], n_points)
-        yi_transformed = space.dimensions[i].transform(yi)
+        # Get the best-found sample for this dimension.
+        best_sample_row = result.x[index_row]
 
-        zi = []
-        for x_ in xi_transformed:
-            row = []
-            for y_ in yi_transformed:
-                rvs_ = np.array(sample_points)
-                rvs_[:, (j, i)] = (x_, y_)
-                row.append(np.mean(model.predict(rvs_)))
-            zi.append(row)
+        # Search-space boundary for this dimension.
+        bounds_row = dim_row.bounds
 
-        return xi, yi, np.array(zi).T
+        # Map the number of bins to a log-space if necessary.
+        bins_mapped = _map_bins(bins=bins,
+                                bounds=dim_row.bounds,
+                                prior=dim_row.prior)
 
+        # Plot a histogram on the diagonal.
+        ax[row, row].hist(samples_row, bins=bins_mapped, range=bounds_row)
 
-def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
-                   zscale='linear', dimensions=None):
-    """Pairwise partial dependence plot of the objective function.
+        # For all columns until the diagonal in the 2-d plot matrix.
+        for col in range(row):
+            # Get the search-space dimension for this column.
+            dim_col = dimensions[col]
 
-    The diagonal shows the partial dependence for dimension `i` with
-    respect to the objective function. The off-diagonal shows the
-    partial dependence for dimensions `i` and `j` with
-    respect to the objective function. The objective function is
-    approximated by `result.model.`
+            # Get the index for this search-space dimension.
+            # This is used to lookup that dimension in some functions.
+            index_col = dim_col.index
 
-    Pairwise scatter plots of the points at which the objective
-    function was directly evaluated are shown on the off-diagonal.
-    A red point indicates the found minimum.
+            # Get the samples from the optimization-log for that dimension.
+            samples_col = get_samples_dimension(result=result, index=index_col)
 
-    Note: search spaces that contain `Categorical` dimensions are
-          currently not supported by this function.
+            # Plot all the parameters that were sampled during optimization.
+            # These are plotted as small coloured dots, where the colour-shade
+            # indicates the time-progression.
+            ax[row, col].scatter(samples_col, samples_row,
+                                 c=sample_order, s=40, lw=0., cmap='viridis')
+
+            # Get the best-found sample for this dimension.
+            best_sample_col = result.x[index_col]
+
+            # Plot the best parameters that were sampled during optimization.
+            # These are plotted as a big red star.
+            ax[row, col].scatter(best_sample_col, best_sample_row,
+                                 c='red', s=100, lw=0., marker='*')
+
+    # Make various adjustments to the plots.
+    _adjust_fig(fig=fig, ax=ax, space=space,
+                dimensions=dimensions, ylabel="Sample Count")
+
+    return fig, ax
+
+
+def plot_objective(result, levels=10, n_points=40, n_samples=250,
+                          zscale='linear', dimension_names=None):
+    """
+    Plot a 2-d matrix with so-called Partial Dependence plots
+    of the objective function. This shows the influence of each
+    search-space dimension on the objective function.
+
+    This uses the last fitted model for estimating the objective function.
+
+    The diagonal shows the effect of a single dimension on the
+    objective function, while the plots below the diagonal show
+    the effect on the objective function when varying two dimensions.
+
+    The Partial Dependence is calculated by averaging the objective value 
+    for a number of random samples in the search-space,
+    while keeping one or two dimensions fixed at regular intervals. This
+    averages out the effect of varying the other dimensions and shows
+    the influence of that dimension(s) on the objective function.
+
+    Also shown are small black dots for the points that were sampled
+    during optimization, and large red stars show the best found points.
+
+    NOTE: Search-spaces with `Categorical` dimensions are not supported.
+
+    NOTE: This function can be very slow for dimensions greater than 5.
 
     Parameters
     ----------
     * `result` [`OptimizeResult`]
-        The result for which to create the scatter plot matrix.
+        The optimization results from calling e.g. `gp_minimize()`.
 
     * `levels` [int, default=10]
         Number of levels to draw on the contour plot, passed directly
         to `plt.contour()`.
 
     * `n_points` [int, default=40]
-        Number of points at which to evaluate the partial dependence
-        along each dimension.
+        Number of points along each dimension where the partial dependence
+        is evaluated when generating the contour-plots.
 
     * `n_samples` [int, default=250]
-        Number of random samples to use for averaging the model function
-        at each of the `n_points`.
-
-    * `size` [float, default=2]
-        Height (in inches) of each facet.
+        Number of points along each dimension where the partial dependence
+        is evaluated when generating the contour-plots.
 
     * `zscale` [str, default='linear']
-        Scale to use for the z axis of the contour plots. Either 'linear'
-        or 'log'.
+        Scale to use for the z-axis of the contour plots.
+        Either 'log' or linear for all other choices.
 
-    * `dimensions` [list of str, default=None] Labels of the dimension
-        variables. `None` defaults to `space.dimensions[i].name`, or
-        if also `None` to `['X_0', 'X_1', ..]`.
+    * `dimension_names` [list(str), default=None]:
+        List of names for search-space dimensions to be used in the plot.
+        You can omit `Categorical` dimensions here as they are not supported.
+        If `None` then use all dimensions from the search-space.
 
     Returns
     -------
-    * `ax`: [`Axes`]:
-        The matplotlib axes.
+    * `fig`: [`Matplotlib.Figure`]:
+        The object for the figure.
+        For example, call `fig.savefig('plot.png')` to save the plot.
+    
+    * `ax`: [`Matplotlib.Axes`]:
+        A 2-d matrix of Axes-objects with the sub-plots.
     """
+
+    # Scale for the z-axis of the contour-plot. Either Log or Linear (None).
+    locator = LogLocator() if zscale == 'log' else None
+
+    # Get the search-space instance from the optimization results.
     space = result.space
-    samples = np.asarray(result.x_iters)
-    rvs_transformed = space.transform(space.rvs(n_samples=n_samples))
 
-    if zscale == 'log':
-        locator = LogLocator()
-    elif zscale == 'linear':
-        locator = None
+    # Get the relevant search-space dimensions.
+    if dimension_names is None:
+        # Get all dimensions.
+        dimensions = space.dimensions
     else:
-        raise ValueError("Valid values for zscale are 'linear' and 'log',"
-                         " not '%s'." % zscale)
-
-    fig, ax = plt.subplots(space.n_dims, space.n_dims,
-                           figsize=(size * space.n_dims, size * space.n_dims))
-
-    fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
-                        hspace=0.1, wspace=0.1)
-
-    for i in range(space.n_dims):
-        for j in range(space.n_dims):
-            if i == j:
-                xi, yi = partial_dependence(space, result.models[-1], i,
-                                            j=None,
-                                            sample_points=rvs_transformed,
-                                            n_points=n_points)
-
-                ax[i, i].plot(xi, yi)
-                ax[i, i].axvline(result.x[i], linestyle="--", color="r", lw=1)
-
-            # lower triangle
-            elif i > j:
-                xi, yi, zi = partial_dependence(space, result.models[-1],
-                                                i, j,
-                                                rvs_transformed, n_points)
-                ax[i, j].contourf(xi, yi, zi, levels,
-                                  locator=locator, cmap='viridis_r')
-                ax[i, j].scatter(samples[:, j], samples[:, i],
-                                 c='k', s=10, lw=0.)
-                ax[i, j].scatter(result.x[j], result.x[i],
-                                 c=['r'], s=20, lw=0.)
-
-    return _format_scatter_plot_axes(ax, space, ylabel="Partial dependence",
-                                     dim_labels=dimensions)
-
-
-def plot_evaluations(result, bins=20, dimensions=None):
-    """Visualize the order in which points where sampled.
-
-    The scatter plot matrix shows at which points in the search
-    space and in which order samples were evaluated. Pairwise
-    scatter plots are shown on the off-diagonal for each
-    dimension of the search space. The order in which samples
-    were evaluated is encoded in each point's color.
-    The diagonal shows a histogram of sampled values for each
-    dimension. A red point indicates the found minimum.
-
-    Note: search spaces that contain `Categorical` dimensions are
-          currently not supported by this function.
+        # Only get the named dimensions.
+        dimensions = space[dimension_names]
+
+    # Ensure there are no categorical dimensions.
+    # TODO replace with check_list_types(dimensions, (Integer, Real)) in PR #597
+    if any(type(dim) == Categorical for dim in dimensions):
+        raise ValueError("Categorical dimension is not supported.")
+
+    # Number of search-space dimensions we are using.
+    n_dims = len(dimensions)
+
+    # Get the last fitted model for the search-space.
+    last_model = result.models[-1]
+
+    # Get new random samples from the search-space and transform if necessary.
+    new_samples = space.rvs(n_samples=n_samples)
+    new_samples = space.transform(new_samples)
+
+    # Create a figure for plotting a 2-d matrix of sub-plots.
+    fig, ax = plt.subplots(n_dims, n_dims, figsize=(2*n_dims, 2*n_dims))
+
+    # For all rows in the 2-d plot matrix.
+    for row in range(n_dims):
+        # Get the search-space dimension for this row.
+        dim_row = dimensions[row]
+
+        # Get the index for the search-space dimension.
+        # This is used to lookup that particular dimension in some functions.
+        index_row = dim_row.index
+
+        # Get the samples from the optimization-log for this dimension.
+        samples_row = get_samples_dimension(result=result, index=index_row)
+
+        # Get the best-found sample for this dimension.
+        best_sample_row = result.x[index_row]
+
+        # Search-space boundary for this dimension.
+        bounds_row = dim_row.bounds
+
+        # Calculate partial dependence for this dimension.
+        xi, yi = partial_dependence_1D(model=last_model,
+                                       dimension=dim_row,
+                                       samples=new_samples,
+                                       n_points=n_points)
+
+        # Reference to the plot for the diagonal of this row.
+        a = ax[row, row]
+
+        # TODO: There is a problem here if yi is very large, then matplotlib
+        # TODO: writes a number above the plot that I don't know how to turn off.
+        # Plot the partial dependence for this dimension.
+        a.plot(xi, yi)
+
+        # Plot a dashed line for the best-found parameter.
+        a.axvline(best_sample_row, linestyle="--", color="red", lw=1)
+
+        # For all columns until the diagonal in the 2-d plot matrix.
+        for col in range(row):
+            # Get the search-space dimension for this column.
+            dim_col = dimensions[col]
+
+            # Get the index for this search-space dimension.
+            # This is used to lookup that dimension in some functions.
+            index_col = dim_col.index
+
+            # Get the samples from the optimization-log for that dimension.
+            samples_col = get_samples_dimension(result=result, index=index_col)
+
+            # Get the best-found sample for this dimension.
+            best_sample_col = result.x[index_col]
+
+            # Calculate the partial dependence for these two dimensions.
+            # Note that column and row are switched here.
+            xi, yi, zi = partial_dependence_2D(model=last_model,
+                                               dimension1=dim_col,
+                                               dimension2=dim_row,
+                                               samples=new_samples,
+                                               n_points=n_points)
+
+            # Reference to the plot for this row and column.
+            a = ax[row, col]
+
+            # Plot the contour landscape for the objective function.
+            a.contourf(xi, yi, zi, levels, locator=locator, cmap='viridis_r')
+
+            # Plot all the parameters that were sampled during optimization.
+            # These are plotted as small black dots.
+            a.scatter(samples_col, samples_row, c='black', s=10, lw=0.)
+
+            # Plot the best parameters that were sampled during optimization.
+            # These are plotted as a big red star.
+            a.scatter(best_sample_col, best_sample_row,
+                      c='red', s=100, lw=0., marker='*')
+
+    # Make various adjustments to the plots.
+    _adjust_fig(fig=fig, ax=ax, space=space,
+                dimensions=dimensions, ylabel="Partial Dependence")
+
+    return fig, ax
+
+
+def plot_objective_2D(result, dimension_name1, dimension_name2,
+                      n_points=40, n_samples=250, levels=10, zscale='linear'):
+    """
+    Create and return a Matplotlib figure and axes with a landscape
+    contour-plot of the last fitted model of the search-space,
+    overlaid with all the samples from the optimization results,
+    for the two given dimensions of the search-space.
+
+    This is similar to `plot_objective()` but only for 2 dimensions.
+    
+    NOTE: Categorical dimensions are not supported.
 
     Parameters
     ----------
     * `result` [`OptimizeResult`]
-        The result for which to create the scatter plot matrix.
+        The optimization results e.g. from calling `gp_minimize()`.
+
+    * `dimension_name1` [str]:
+        Name of a dimension in the search-space.
+
+    * `dimension_name2` [str]:
+        Name of a dimension in the search-space.
+
+    * `n_samples` [int, default=250]
+        Number of random samples used for estimating the contour-plot
+        of the objective function.
+
+    * `n_points` [int, default=40]
+        Number of points along each dimension where the partial dependence
+        is evaluated when generating the contour-plots.
+
+    * `levels` [int, default=10]
+        Number of levels to draw on the contour plot.
+
+    * `zscale` [str, default='linear']
+        Scale to use for the z axis of the contour plots.
+        Either 'log' or linear for all other choices.
+
+    Returns
+    -------
+    * `fig`: [`Matplotlib.Figure`]:
+        The Matplotlib Figure-object.
+        For example, you can save the plot by calling `fig.savefig('file.png')` 
+
+    * `ax`: [`Matplotlib.Axes`]:
+        The Matplotlib Figure-object.
+        For example, you can save the plot by calling `fig.savefig('file.png')` 
+    """
+
+    # Get the search-space instance from the optimization results.
+    space = result.space
+
+    # Get the dimension-object, its index in the search-space, and its name.
+    dimension1 = space[dimension_name1]
+    dimension2 = space[dimension_name2]
+
+    # Ensure dimensions are not Categorical.
+    # TODO replace with check_list_types(dimensions, (Integer, Real)) in PR #597
+    if any(type(dim) == Categorical for dim in [dimension1, dimension2]):
+        raise ValueError("Categorical dimension is not supported.")
+
+    # Get the indices for the search-space dimensions.
+    index1 = dimension1.index
+    index2 = dimension2.index
+
+    # Get the samples from the optimization-log for the relevant dimensions.
+    samples1 = get_samples_dimension(result=result, index=index1)
+    samples2 = get_samples_dimension(result=result, index=index2)
+
+    # Get the best-found samples for the relevant dimensions.
+    best_sample1 = result.x[index1]
+    best_sample2 = result.x[index2]
+
+    # Get the last fitted model for the search-space.
+    last_model = result.models[-1]
+
+    # Get new random samples from the search-space and transform if necessary.
+    new_samples = space.rvs(n_samples=n_samples)
+    new_samples = space.transform(new_samples)
+
+    # Estimate the objective function for these sampled points
+    # using the last fitted model for the search-space.
+    xi, yi, zi = partial_dependence_2D(model=last_model,
+                                       dimension1=dimension1,
+                                       dimension2=dimension2,
+                                       samples=new_samples,
+                                       n_points=n_points)
+
+    # Start a new plot.
+    fig, ax = plt.subplots(nrows=1, ncols=1)
+
+    # Scale for the z-axis of the contour-plot. Either Log or Linear (None).
+    locator = LogLocator() if zscale == 'log' else None
+
+    # Plot the contour-landscape for the objective function.
+    ax.contourf(xi, yi, zi, levels, locator=locator, cmap='viridis_r')
+
+    # Plot all the parameters that were sampled during optimization.
+    # These are plotted as small black dots.
+    ax.scatter(samples1, samples2, c='black', s=10, linewidths=1)
+
+    # Plot the best parameters that were sampled during optimization.
+    # These are plotted as a big red star.
+    ax.scatter(best_sample1, best_sample2,
+               c='red', s=50, linewidths=1, marker='*')
+
+    # Use the dimension-names as the labels for the plot-axes.
+    ax.set_xlabel(dimension_name1)
+    ax.set_ylabel(dimension_name2)
+
+    # Use log-scale on the x-axis?
+    if dimension1.prior == 'log-uniform':
+        ax.set_xscale('log')
+
+    # Use log-scale on the y-axis?
+    if dimension2.prior == 'log-uniform':
+        ax.set_yscale('log')
+
+    return fig, ax
+
+
+def plot_histogram(result, dimension_name, bins=20, rotate_labels=0):
+    """
+    Create and return a Matplotlib figure with a histogram
+    of the samples from the optimization results,
+    for a given dimension of the search-space.
+
+    Parameters
+    ----------
+    * `result` [`OptimizeResult`]
+        The optimization results e.g. from calling `gp_minimize()`.
+
+    * `dimension_name` [str]:
+        Name of a dimension in the search-space.
 
     * `bins` [int, bins=20]:
-        Number of bins to use for histograms on the diagonal.
+        Number of bins in the histogram.
 
-    * `dimensions` [list of str, default=None] Labels of the dimension
-        variables. `None` defaults to `space.dimensions[i].name`, or
-        if also `None` to `['X_0', 'X_1', ..]`.
+    * `rotate_labels` [int, rotate_labels=0]:
+        Degree to rotate category-names on the x-axis.
+        Only used for Categorical dimensions.
 
     Returns
     -------
-    * `ax`: [`Axes`]:
-        The matplotlib axes.
+    * `fig`: [`Matplotlib.Figure`]:
+        The Matplotlib Figure-object.
+        For example, you can save the plot by calling `fig.savefig('file.png')` 
+
+    * `ax`: [`Matplotlib.Axes`]:
+        The Matplotlib Axes-object.
     """
+
+    # Get the search-space instance from the optimization results.
     space = result.space
-    samples = np.asarray(result.x_iters)
-    order = range(samples.shape[0])
-    fig, ax = plt.subplots(space.n_dims, space.n_dims,
-                           figsize=(2 * space.n_dims, 2 * space.n_dims))
-
-    fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
-                        hspace=0.1, wspace=0.1)
-
-    for i in range(space.n_dims):
-        for j in range(space.n_dims):
-            if i == j:
-                if space.dimensions[j].prior == 'log-uniform':
-                    low, high = space.bounds[j]
-                    bins_ = np.logspace(np.log10(low), np.log10(high), bins)
-                else:
-                    bins_ = bins
-                ax[i, i].hist(samples[:, j], bins=bins_,
-                              range=space.dimensions[j].bounds)
-
-            # lower triangle
-            elif i > j:
-                ax[i, j].scatter(samples[:, j], samples[:, i], c=order,
-                                 s=40, lw=0., cmap='viridis')
-                ax[i, j].scatter(result.x[j], result.x[i],
-                                 c=['r'], s=20, lw=0.)
-
-    return _format_scatter_plot_axes(ax, space, ylabel="Number of samples",
-                                     dim_labels=dimensions)
+
+    # Get the dimension-object.
+    dimension = space[dimension_name]
+
+    # Get the samples from the optimization-log for that particular dimension.
+    samples = get_samples_dimension(result=result, index=dimension.index)
+
+    # Start a new plot.
+    fig, ax = plt.subplots(nrows=1, ncols=1)
+
+    if type(dimension) == Categorical:
+        # When the search-space dimension is Categorical, it means
+        # that the possible values are strings. Matplotlib's histogram
+        # does not support this, so we have to make a bar-plot instead.
+
+        # NOTE: This only shows the categories that are in the samples.
+        # So if a category was not sampled, it will not be shown here.
+
+        # Count the number of occurrences of the string-categories.
+        counter = Counter(samples)
+
+        # The counter returns a dict where the keys are the category-names
+        # and the values are the number of occurrences for each category.
+        names = list(counter.keys())
+        counts = list(counter.values())
+
+        # Although Matplotlib's docs indicate that the bar() function
+        # can take a list of strings for the x-axis, it doesn't appear to work.
+        # So we hack it by creating a list of integers and setting the
+        # tick-labels with the category-names instead.
+        x = np.arange(len(counts))
+
+        # Plot using bars.
+        ax.bar(x, counts, tick_label=names)
+
+        # Adjust the rotation of the category-names on the x-axis.
+        ax.set_xticklabels(labels=names, rotation=rotate_labels)
+    else:
+        # Otherwise the search-space Dimension is either integer or float,
+        # in which case the histogram can be plotted more easily.
+
+        # Map the number of bins to a log-space if necessary.
+        bins_mapped = _map_bins(bins=bins,
+                                bounds=dimension.bounds,
+                                prior=dimension.prior)
+
+        # Plot the histogram.
+        ax.hist(samples, bins=bins_mapped, range=dimension.bounds)
+
+        # Use log-scale on the x-axis?
+        if dimension.prior == 'log-uniform':
+            ax.set_xscale('log')
+
+    # Set the labels.
+    ax.set_xlabel(dimension_name)
+    ax.set_ylabel('Sample Count')
+
+    return fig, ax
diff --git a/skopt/space/space.py b/skopt/space/space.py
index f5c5be6aa..24d41a633 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -104,6 +104,23 @@ def check_dimension(dimension, transform=None):
 
 class Dimension(object):
     """Base class for search space dimensions."""
+    def __init__(self, name=None):
+        """
+        Common initialization for all types of search-spaces dimensions.
+
+        Call this at the start of __init__ for all sub-classes.
+        
+        Parameters
+        ----------
+        * `name` [str or None]:
+            Name associated with the dimension.
+        """
+
+        # Name of this dimension.
+        self.name = name
+
+        # Initialize index before it has been set by search-space.
+        self.index = None
 
     prior = None
 
@@ -151,7 +168,13 @@ def transformed_bounds(self):
 
     @property
     def name(self):
-        return self._name
+        if self._name is None:
+            # Dimension has no name, so make a default one.
+            n = 'X_{}'.format(self.index)
+        else:
+            # Dimension has a name, so use it.
+            n = self._name
+        return n
 
     @name.setter
     def name(self, value):
@@ -160,6 +183,20 @@ def name(self, value):
         else:
             raise ValueError("Dimension's name must be either string or None.")
 
+    @property
+    def index(self):
+        """Index of the dimension in the search-space. This follows
+        the order in which the dimensions were added to the search-space.
+        It should not be changed after it has first been set.
+        It is used to give unnamed dimensions a default name,
+        and it is also used e.g. in plotting functions for
+        both named and unnamed dimensions."""
+        return self._index
+
+    @index.setter
+    def index(self, value):
+        self._index = value
+
 
 def _uniform_inclusive(loc=0.0, scale=1.0):
     # like scipy.stats.distributions but inclusive of `high`
@@ -196,15 +233,16 @@ def __init__(self, low, high, prior="uniform", transform=None, name=None):
               0 and 1.
 
         * `name` [str or None]:
-            Name associated with the dimension, e.g., "learning rate".
+            Name associated with the dimension, e.g., "learning_rate".
         """
+        Dimension.__init__(self, name=name)
+
         if high <= low:
             raise ValueError("the lower bound {} has to be less than the"
                              " upper bound {}".format(low, high))
         self.low = low
         self.high = high
         self.prior = prior
-        self.name = name
 
         if transform is None:
             transform = "identity"
@@ -248,8 +286,8 @@ def __eq__(self, other):
                 self.transform_ == other.transform_)
 
     def __repr__(self):
-        return "Real(low={}, high={}, prior='{}', transform='{}')".format(
-            self.low, self.high, self.prior, self.transform_)
+        return "Real(low={}, high={}, prior={}, transform={}, name={})".format(
+            self.low, self.high, self.prior, self.transform_, self.name)
 
     def inverse_transform(self, Xt):
         """Inverse transform samples from the warped space back into the
@@ -315,14 +353,15 @@ def __init__(self, low, high, transform=None, name=None):
               0 and 1.
 
         * `name` [str or None]:
-            Name associated with dimension, e.g., "number of trees".
+            Name associated with the dimension, e.g., "n_trees".
         """
+        Dimension.__init__(self, name=name)
+
         if high <= low:
             raise ValueError("the lower bound {} has to be less than the"
                              " upper bound {}".format(low, high))
         self.low = low
         self.high = high
-        self.name = name
 
         if transform is None:
             transform = "identity"
@@ -345,7 +384,7 @@ def __eq__(self, other):
                 np.allclose([self.high], [other.high]))
 
     def __repr__(self):
-        return "Integer(low={}, high={})".format(self.low, self.high)
+        return "Integer(low={}, high={}, name={})".format(self.low, self.high, self.name)
 
     def inverse_transform(self, Xt):
         """Inverse transform samples from the warped space back into the
@@ -406,10 +445,11 @@ def __init__(self, categories, prior=None, transform=None, name=None):
               representation of the original space.
 
         * `name` [str or None]:
-            Name associated with dimension, e.g., "colors".
+            Name associated with the dimension, e.g., "colors".
         """
+        Dimension.__init__(self, name=name)
+
         self.categories = tuple(categories)
-        self.name = name
 
         if transform is None:
             transform = "onehot"
@@ -451,7 +491,7 @@ def __repr__(self):
         else:
             prior = self.prior
 
-        return "Categorical(categories={}, prior={})".format(cats, prior)
+        return "Categorical(categories={}, prior={}, name={})".format(cats, prior, self.name)
 
     def rvs(self, n_samples=None, random_state=None):
         choices = self._rvs.rvs(size=n_samples, random_state=random_state)
@@ -529,6 +569,25 @@ def __init__(self, dimensions):
         """
         self.dimensions = [check_dimension(dim) for dim in dimensions]
 
+        # Set the index for all the dimensions.
+        # This is used e.g. in the plotting-functions so we don't
+        # have to return the index with __getitem__ below.
+        # It is important to set this before accessing the dimension-names,
+        # because the index is used for default names if the dimension
+        # is unnamed.
+        for i in range(self.n_dims):
+            self.dimensions[i].index = i
+
+        # Names of all the dimensions in the search-space.
+        # This is also a @property further below, but it may be accessed
+        # many times e.g. in __getitem__ so we compute it only once here.
+        self._dimension_names = [dim.name for dim in self.dimensions]
+
+        # Ensure all dimension names are unique.
+        if len(np.unique(self._dimension_names)) != len(self._dimension_names):
+            raise ValueError("All dimension names must be unique.")
+
+
     def __eq__(self, other):
         return all([a == b for a, b in zip(self.dimensions, other.dimensions)])
 
@@ -542,6 +601,16 @@ def __repr__(self):
     def __iter__(self):
         return iter(self.dimensions)
 
+    @property
+    def dimension_names(self):
+        """
+        Names of all the dimensions in the search-space.
+        """
+
+        # NOTE: This may be called many times e.g. by __getitem__
+        # so we use a pre-computed list instead of re-computing it every time.
+        return self._dimension_names
+
     @property
     def is_real(self):
         """
@@ -698,6 +767,57 @@ def __contains__(self, point):
                 return False
         return True
 
+    def __getitem__(self, dimension_names):
+        """
+        Lookup and return the search-space dimension with the given name.
+        
+        This allows for dict-like lookup of dimensions, for example:
+        `space['foo']` returns the dimension named 'foo' if it exists,
+        otherwise a `ValueError` exception is raised.
+        
+        It also allows for lookup of a list of dimension-names, for example:
+        `space[['foo', 'bar']]` returns the two dimensions named
+        'foo' and 'bar' if they exist.
+        
+        Parameters
+        ----------
+        * `dimension_names` [str or list(str)]:
+            Name of a single search-space dimension (str).
+            List of names for search-space dimensions (list(str)).
+
+        Raises
+        ------
+        * `ValueError`:
+            If there is no search-space dimension with the given name.
+
+        Returns
+        -------
+        * `dims` [Dimension or list(Dimension)]:
+            A single search-space dimension with the given name,
+            or a list of search-space dimensions with the given names.
+        """
+        def _get(dimension_name):
+            """Helper-function for getting a single dimension."""
+
+            # Get the index of the search-space dimension using its name.
+            idx = self._dimension_names.index(dimension_name)
+
+            # Get and return the dimension-object.
+            return self.dimensions[idx]
+
+        if isinstance(dimension_names, str):
+            # Get a single search-space dimension.
+            dims = _get(dimension_name=dimension_names)
+        elif isinstance(dimension_names, list):
+            # Get a list of search-space dimensions.
+            # Note that we do not check whether the names are really strings.
+            dims = [_get(dimension_name=name) for name in dimension_names]
+        else:
+            msg = "Dimension name should be either string or list of strings, but got {}."
+            raise ValueError(msg.format(type(dimension_names)))
+
+        return dims
+
     @property
     def transformed_bounds(self):
         """The dimension bounds, in the warped space."""
@@ -732,3 +852,35 @@ def distance(self, point_a, point_b):
             distance += dim.distance(a, b)
 
         return distance
+
+
+    def point_to_dict(self, x):
+        """Convert a point in the search-space from a list
+        to a dict where the keys are the names of the dimensions.
+
+        NOTE: There is a related function in `utils.point_asdict()`
+        but it takes the search-space as a dict instead.
+        
+        Example
+        -------
+        If `self.dimension_names = ['height', 'width', 'color']`
+        then `point_to_dict(x=[1, 2.0, 'red'])` returns the dict:
+        `{'height': 1, 'width': 2.0, 'color': 'red'}`
+
+        Parameters
+        ----------
+        * `x` [list]:
+            A point in the search-space.
+
+        Returns
+        -------
+        * `x_dict` [dict]
+            The point `x` in the search-space wrapped in a dict.
+            The keys are the names of the dimensions, and the
+            values are from `x`.
+        """
+
+        x_dict = {dim_name: value
+                  for dim_name, value in zip(self.dimension_names, x)}
+
+        return x_dict
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index aa677f4d9..d55bfb785 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -459,19 +459,13 @@ def test_dimension_bounds(dimension, bounds):
 
 
 @pytest.mark.parametrize("dimension, name",
-                         [(Real(1, 2, name="learning rate"), "learning rate"),
-                          (Integer(1, 100, name="no of trees"), "no of trees"),
+                         [(Real(1, 2, name="learning_rate"), "learning_rate"),
+                          (Integer(1, 100, name="n_trees"), "n_trees"),
                           (Categorical(["red, blue"], name="colors"), "colors")])
 def test_dimension_name(dimension, name):
     assert dimension.name == name
 
 
-@pytest.mark.parametrize("dimension",
-                         [Real(1, 2), Integer(1, 100), Categorical(["red, blue"])])
-def test_dimension_name_none(dimension):
-    assert dimension.name is None
-
-
 def test_dimension_name():
     notnames = [1, 1., True]
     for n in notnames:
diff --git a/skopt/utils.py b/skopt/utils.py
index 9c824643d..b1bfc426c 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -487,12 +487,12 @@ def normalize_dimensions(dimensions):
             elif isinstance(dimension, Real):
                 transformed_dimensions.append(
                     Real(dimension.low, dimension.high, dimension.prior,
-                         transform="normalize")
+                         transform="normalize", name=dimension.name)
                     )
             elif isinstance(dimension, Integer):
                 transformed_dimensions.append(
                     Integer(dimension.low, dimension.high,
-                            transform="normalize")
+                            transform="normalize", name=dimension.name)
                     )
             else:
                 raise RuntimeError("Unknown dimension type "
@@ -501,6 +501,43 @@ def normalize_dimensions(dimensions):
     return Space(transformed_dimensions)
 
 
+def get_samples_dimension(result, index):
+    """Get the samples for the given dimension index
+    from the optimization-result from e.g. `gp_minimize()`.
+
+    This function is used instead of numpy, because if
+    we convert `result.x_iters` to a 2-d numpy array,
+    then all data-types must be identical otherwise numpy
+    will promote all the types to the most general type.
+    For example, if you have a Categorical dimension which
+    is a string, then your Real and Integer dimensions will
+    be converted to strings as well in the 2-d numpy array.
+
+    Using this function instead of numpy ensures the
+    original data-type is being preserved.
+    
+    See `plots.py` for example usage.
+
+    Parameters
+    ----------
+    * `result` [`OptimizeResult`]
+        The optimization results e.g. from calling `gp_minimize()`.
+
+    * `index` [int]:
+        Index for a dimension in the search-space.
+
+    Returns
+    -------
+    * `samples`: [list of either int, float or string]:
+        The optimization samples for the given dimension.
+    """
+
+    # Get the samples from the optimization-log for the relevant dimension.
+    samples = [x[index] for x in result.x_iters]
+
+    return samples
+
+
 def use_named_args(dimensions):
     """
     Wrapper / decorator for an objective function that uses named arguments

From dd7433da068b5a2509ef4ea4e5195458393e6555 Mon Sep 17 00:00:00 2001
From: Magnus <magnus@hvass-labs.org>
Date: Fri, 26 Jan 2018 13:00:02 +0100
Subject: [PATCH 003/265] Fixed issues from reviewers. Should be ready to merge
 now.

---
 skopt/plots.py            | 37 +++++++++++++++++++++++++++++--------
 skopt/space/space.py      |  6 +++---
 skopt/tests/test_space.py |  4 ++--
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index f411bdf4b..02a86bc6d 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -286,7 +286,11 @@ def _map_bins(bins, bounds, prior):
 
     if prior == 'log-uniform':
         # Map the number of bins to a log-space for the dimension bounds.
-        bins_mapped = np.logspace(*np.log10(bounds), bins)
+        bounds_log = np.log10(bounds)
+        bins_mapped = np.logspace(bounds_log[0], bounds_log[1], bins)
+
+        # Note that Python 3.X supports the following, but not Python 2.7
+        # bins_mapped = np.logspace(*np.log10(bounds), bins)
     else:
         # Use the original number of bins.
         bins_mapped = bins
@@ -517,7 +521,7 @@ def plot_evaluations(result, bins=20, dimension_names=None):
 
     # Ensure there are no categorical dimensions.
     # TODO replace with check_list_types(dimensions, (Integer, Real)) in PR #597
-    if any(type(dim) == Categorical for dim in dimensions):
+    if any(isinstance(dim, Categorical) for dim in dimensions):
         raise ValueError("Categorical dimension is not supported.")
 
     # Number of search-space dimensions we are using.
@@ -590,7 +594,7 @@ def plot_evaluations(result, bins=20, dimension_names=None):
 
 
 def plot_objective(result, levels=10, n_points=40, n_samples=250,
-                          zscale='linear', dimension_names=None):
+                   zscale='linear', dimension_names=None):
     """
     Plot a 2-d matrix with so-called Partial Dependence plots
     of the objective function. This shows the influence of each
@@ -606,11 +610,27 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250,
     for a number of random samples in the search-space,
     while keeping one or two dimensions fixed at regular intervals. This
     averages out the effect of varying the other dimensions and shows
-    the influence of that dimension(s) on the objective function.
+    the influence of one or two dimensions on the objective function.
 
     Also shown are small black dots for the points that were sampled
     during optimization, and large red stars show the best found points.
 
+    NOTE: The Partial Dependence plot is only an estimation of the surrogate
+          model which in turn is only an estimation of the true objective
+          function that has been optimized. This means the plots show
+          an "estimate of an estimate" and may therefore be quite imprecise,
+          especially if few samples have been collected during the optimization
+          (e.g. less than 100-200 samples), and in regions of the search-space
+          that have been sparsely sampled (e.g. regions away from the optimum).
+          This means that the plots may change each time you run the
+          optimization and they should not be considered completely reliable.
+          These compromises are necessary because we cannot evaluate the
+          expensive objective function in order to plot it, so we have to use
+          the cheaper surrogate model to plot its contour. And in order to
+          show search-spaces with 3 dimensions or more in a 2-dimensional plot,
+          we further need to map those dimensions to only 2-dimensions using
+          the Partial Dependence, which also causes distortions in the plots.
+
     NOTE: Search-spaces with `Categorical` dimensions are not supported.
 
     NOTE: This function can be very slow for dimensions greater than 5.
@@ -667,7 +687,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250,
 
     # Ensure there are no categorical dimensions.
     # TODO replace with check_list_types(dimensions, (Integer, Real)) in PR #597
-    if any(type(dim) == Categorical for dim in dimensions):
+    if any(isinstance(dim, Categorical) for dim in dimensions):
         raise ValueError("Categorical dimension is not supported.")
 
     # Number of search-space dimensions we are using.
@@ -771,7 +791,8 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
     overlaid with all the samples from the optimization results,
     for the two given dimensions of the search-space.
 
-    This is similar to `plot_objective()` but only for 2 dimensions.
+    This is similar to `plot_objective()` but only for 2 dimensions
+    whose doc-string also has a more extensive explanation.
     
     NOTE: Categorical dimensions are not supported.
 
@@ -821,7 +842,7 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
 
     # Ensure dimensions are not Categorical.
     # TODO replace with check_list_types(dimensions, (Integer, Real)) in PR #597
-    if any(type(dim) == Categorical for dim in [dimension1, dimension2]):
+    if any(isinstance(dim, Categorical) for dim in [dimension1, dimension2]):
         raise ValueError("Categorical dimension is not supported.")
 
     # Get the indices for the search-space dimensions.
@@ -927,7 +948,7 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0):
     # Start a new plot.
     fig, ax = plt.subplots(nrows=1, ncols=1)
 
-    if type(dimension) == Categorical:
+    if isinstance(dimension, Categorical):
         # When the search-space dimension is Categorical, it means
         # that the possible values are strings. Matplotlib's histogram
         # does not support this, so we have to make a bar-plot instead.
diff --git a/skopt/space/space.py b/skopt/space/space.py
index 24d41a633..4b7e937b7 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -286,7 +286,7 @@ def __eq__(self, other):
                 self.transform_ == other.transform_)
 
     def __repr__(self):
-        return "Real(low={}, high={}, prior={}, transform={}, name={})".format(
+        return "Real(low={}, high={}, prior='{}', transform='{}', name='{}')".format(
             self.low, self.high, self.prior, self.transform_, self.name)
 
     def inverse_transform(self, Xt):
@@ -384,7 +384,7 @@ def __eq__(self, other):
                 np.allclose([self.high], [other.high]))
 
     def __repr__(self):
-        return "Integer(low={}, high={}, name={})".format(self.low, self.high, self.name)
+        return "Integer(low={}, high={}, name='{}')".format(self.low, self.high, self.name)
 
     def inverse_transform(self, Xt):
         """Inverse transform samples from the warped space back into the
@@ -491,7 +491,7 @@ def __repr__(self):
         else:
             prior = self.prior
 
-        return "Categorical(categories={}, prior={}, name={})".format(cats, prior, self.name)
+        return "Categorical(categories={}, prior={}, name='{}')".format(cats, prior, self.name)
 
     def rvs(self, n_samples=None, random_state=None):
         choices = self._rvs.rvs(size=n_samples, random_state=random_state)
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index d55bfb785..6b00c6914 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -170,11 +170,11 @@ def test_categorical_transform_binary():
 def test_categorical_repr():
     small_cat = Categorical([1, 2, 3, 4, 5])
     assert (small_cat.__repr__() ==
-            "Categorical(categories=(1, 2, 3, 4, 5), prior=None)")
+            "Categorical(categories=(1, 2, 3, 4, 5), prior=None, name='X_None')")
 
     big_cat = Categorical([1, 2, 3, 4, 5, 6, 7, 8])
     assert (big_cat.__repr__() ==
-            'Categorical(categories=(1, 2, 3, ..., 6, 7, 8), prior=None)')
+            "Categorical(categories=(1, 2, 3, ..., 6, 7, 8), prior=None, name='X_None')")
 
 
 @pytest.mark.fast_test

From ab9ca5d83c2d21bd12b6bc755f3a4f1585fc9a4f Mon Sep 17 00:00:00 2001
From: Caldas <caldasb@e0426965.vf-pt.internal.vodafone.com>
Date: Tue, 12 Jun 2018 16:18:11 +0100
Subject: [PATCH 004/265] Return optimizer results in BayesSearchCV

---
 skopt/searchcv.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 26aefbc94..6fc1eebd8 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -638,6 +638,7 @@ def fit(self, X, y=None, groups=None, callback=None):
         self.cv_results_ = defaultdict(list)
         self.best_index_ = None
         self.multimetric_ = False
+        self.optimizer_results_ = {}
 
         n_points = self.n_points
 
@@ -662,6 +663,8 @@ def fit(self, X, y=None, groups=None, callback=None):
 
                 if eval_callbacks(callbacks, optim_result):
                     break
+            
+            self.optimizer_results_[optimizer] = optim_result
 
         # Refit the best model on the the whole dataset
         if self.refit:

From 768ba69646331184025ce395de380cf6fa4d6c08 Mon Sep 17 00:00:00 2001
From: Caldas <caldasb@e0426965.vf-pt.internal.vodafone.com>
Date: Tue, 12 Jun 2018 16:30:46 +0100
Subject: [PATCH 005/265] remove whitespace

---
 skopt/searchcv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 6fc1eebd8..37a1b677c 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -663,7 +663,7 @@ def fit(self, X, y=None, groups=None, callback=None):
 
                 if eval_callbacks(callbacks, optim_result):
                     break
-            
+
             self.optimizer_results_[optimizer] = optim_result
 
         # Refit the best model on the the whole dataset

From 334638981410e47d1f2b5cc3c529dc82db4b60fc Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 2 Feb 2020 15:01:21 +0100
Subject: [PATCH 006/265] Fix license formatting and title, so that github
 recognize it

---
 LICENSE.md => LICENSE | 21 ++++++++++-----------
 setup.py              |  2 +-
 2 files changed, 11 insertions(+), 12 deletions(-)
 rename LICENSE.md => LICENSE (62%)

diff --git a/LICENSE.md b/LICENSE
similarity index 62%
rename from LICENSE.md
rename to LICENSE
index cf5772813..412f53992 100644
--- a/LICENSE.md
+++ b/LICENSE
@@ -1,23 +1,22 @@
-New BSD License
+BSD 3-Clause License
 
 Copyright (c) 2016-2020 The scikit-optimize developers.
-
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
-  a. Redistributions of source code must retain the above copyright notice,
-     this list of conditions and the following disclaimer.
+1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
      
-  b. Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
      
-  c. Neither the name of the scikit-optimize developers nor the names of
-     its contributors may be used to endorse or promote products
-     derived from this software without specific prior written
-     permission. 
+3. Neither the name of the scikit-optimize developers nor the names of
+   its contributors may be used to endorse or promote products
+   derived from this software without specific prior written
+   permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
diff --git a/setup.py b/setup.py
index ac0675dea..b66a93600 100644
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,7 @@
       description='Sequential model-based optimization toolbox.',
       long_description=open('README.rst').read(),
       url='https://scikit-optimize.github.io/',
-      license='BSD',
+      license='BSD 3-clause "New" or "Revised License"',
       author='The scikit-optimize contributors',
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
                 'skopt.learning.gaussian_process'],

From acad78849635548827b3d05aa82e9ad7b9377eda Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 2 Feb 2020 15:23:14 +0100
Subject: [PATCH 007/265] Fix license, so that github can detect it

---
 LICENSE | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/LICENSE b/LICENSE
index 412f53992..0dbeb1883 100644
--- a/LICENSE
+++ b/LICENSE
@@ -6,26 +6,24 @@ All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
-1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-     
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-     
-3. Neither the name of the scikit-optimize developers nor the names of
-   its contributors may be used to endorse or promote products
-   derived from this software without specific prior written
-   permission.
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file

From 735b5a7147607983f10985dedaa0c1c8c9c98c1f Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 2 Feb 2020 15:36:42 +0100
Subject: [PATCH 008/265] Fix zenodo json

---
 .zenodo.json | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/.zenodo.json b/.zenodo.json
index 7c95c0c3e..03bb01a54 100644
--- a/.zenodo.json
+++ b/.zenodo.json
@@ -124,6 +124,13 @@
     "hyperparameter",
     "bayesian-optimization"
   ],
-  "license": "bsd-license",
+  "license": "BSD-3-Clause",
+  "related_identifiers": [
+    {
+      "identifier": "https://scikit-optimize.github.io",
+      "relation": "documents",
+      "scheme": "url"
+    }
+  ],
   "upload_type": "software"
 }
\ No newline at end of file

From b23d769a3d115b52d80bd22b62df2727f9d51b98 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 3 Feb 2020 11:19:43 +0100
Subject: [PATCH 009/265] Add unit test and missing min_impurity_decrease
 parameter

---
 skopt/learning/forest.py            |   8 ++-
 skopt/learning/tests/test_forest.py | 100 ++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+), 2 deletions(-)
 create mode 100644 skopt/learning/tests/test_forest.py

diff --git a/skopt/learning/forest.py b/skopt/learning/forest.py
index 62746575e..67cee9acf 100644
--- a/skopt/learning/forest.py
+++ b/skopt/learning/forest.py
@@ -187,7 +187,8 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
     def __init__(self, n_estimators=10, criterion='mse', max_depth=None,
                  min_samples_split=2, min_samples_leaf=1,
                  min_weight_fraction_leaf=0.0, max_features='auto',
-                 max_leaf_nodes=None, bootstrap=True, oob_score=False,
+                 max_leaf_nodes=None, min_impurity_decrease=0.,
+                 bootstrap=True, oob_score=False,
                  n_jobs=1, random_state=None, verbose=0, warm_start=False,
                  min_variance=0.0):
         self.min_variance = min_variance
@@ -198,6 +199,7 @@ def __init__(self, n_estimators=10, criterion='mse', max_depth=None,
             min_samples_leaf=min_samples_leaf,
             min_weight_fraction_leaf=min_weight_fraction_leaf,
             max_features=max_features, max_leaf_nodes=max_leaf_nodes,
+            min_impurity_decrease=min_impurity_decrease,
             bootstrap=bootstrap, oob_score=oob_score,
             n_jobs=n_jobs, random_state=random_state,
             verbose=verbose, warm_start=warm_start)
@@ -371,7 +373,8 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
     def __init__(self, n_estimators=10, criterion='mse', max_depth=None,
                  min_samples_split=2, min_samples_leaf=1,
                  min_weight_fraction_leaf=0.0, max_features='auto',
-                 max_leaf_nodes=None, bootstrap=False, oob_score=False,
+                 max_leaf_nodes=None, min_impurity_decrease=0.,
+                 bootstrap=False, oob_score=False,
                  n_jobs=1, random_state=None, verbose=0, warm_start=False,
                  min_variance=0.0):
         self.min_variance = min_variance
@@ -382,6 +385,7 @@ def __init__(self, n_estimators=10, criterion='mse', max_depth=None,
             min_samples_leaf=min_samples_leaf,
             min_weight_fraction_leaf=min_weight_fraction_leaf,
             max_features=max_features, max_leaf_nodes=max_leaf_nodes,
+            min_impurity_decrease=min_impurity_decrease,
             bootstrap=bootstrap, oob_score=oob_score,
             n_jobs=n_jobs, random_state=random_state,
             verbose=verbose, warm_start=warm_start)
diff --git a/skopt/learning/tests/test_forest.py b/skopt/learning/tests/test_forest.py
new file mode 100644
index 000000000..27d03b01b
--- /dev/null
+++ b/skopt/learning/tests/test_forest.py
@@ -0,0 +1,100 @@
+import numpy as np
+import pytest
+
+from scipy import stats
+
+from numpy.testing import assert_equal
+from numpy.testing import assert_array_equal
+from numpy.testing import assert_almost_equal
+
+from skopt.learning import ExtraTreesRegressor, RandomForestRegressor
+
+
+def truth(X):
+    return 0.5 * np.sin(1.75*X[:, 0])
+
+
+@pytest.mark.fast_test
+def test_random_forest():
+    # toy sample
+    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+    y = [-1, -1, -1, 1, 1, 1]
+    T = [[-1, -1], [2, 2], [3, 2]]
+    true_result = [-1, 1, 1]
+
+    clf = RandomForestRegressor(n_estimators=10, random_state=1)
+    clf.fit(X, y)
+
+    assert_array_equal(clf.predict(T), true_result)
+    assert 10 == len(clf)
+
+    clf = RandomForestRegressor(n_estimators=10, min_impurity_decrease=0.1,
+                                random_state=1)
+    clf.fit(X, y)
+
+    assert_array_equal(clf.predict(T), true_result)
+    assert 10 == len(clf)
+
+    clf = RandomForestRegressor(n_estimators=10, criterion="mse",
+                                max_depth=None, min_samples_split=2,
+                                min_samples_leaf=1, min_weight_fraction_leaf=0.,
+                                max_features="auto", max_leaf_nodes=None,
+                                min_impurity_decrease=0., bootstrap=True,
+                                oob_score=False,
+                                n_jobs=None, random_state=1,
+                                verbose=0, warm_start=False)
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result)
+    assert 10 == len(clf)
+
+    clf = RandomForestRegressor(n_estimators=10, max_features=1, random_state=1)
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result)
+    assert 10 == len(clf)
+
+    # also test apply
+    leaf_indices = clf.apply(X)
+    assert leaf_indices.shape == (len(X), clf.n_estimators)
+
+
+@pytest.mark.fast_test
+def test_extra_forest():
+    # toy sample
+    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+    y = [-1, -1, -1, 1, 1, 1]
+    T = [[-1, -1], [2, 2], [3, 2]]
+    true_result = [-1, 1, 1]
+
+    clf = ExtraTreesRegressor(n_estimators=10, random_state=1)
+    clf.fit(X, y)
+
+    assert_array_equal(clf.predict(T), true_result)
+    assert 10 == len(clf)
+
+    clf = ExtraTreesRegressor(n_estimators=10, min_impurity_decrease=0.1,
+                              random_state=1)
+    clf.fit(X, y)
+
+    assert_array_equal(clf.predict(T), true_result)
+    assert 10 == len(clf)
+
+    clf = ExtraTreesRegressor(n_estimators=10, criterion="mse",
+                              max_depth=None, min_samples_split=2,
+                              min_samples_leaf=1, min_weight_fraction_leaf=0.,
+                              max_features="auto", max_leaf_nodes=None,
+                              min_impurity_decrease=0., bootstrap=False,
+                              oob_score=False,
+                              n_jobs=None, random_state=1,
+                              verbose=0, warm_start=False)
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result)
+    assert 10 == len(clf)
+
+    clf = ExtraTreesRegressor(n_estimators=10, max_features=1, random_state=1)
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result)
+    assert 10 == len(clf)
+
+    # also test apply
+    leaf_indices = clf.apply(X)
+    assert leaf_indices.shape == (len(X), clf.n_estimators)

From 793a7bb588cc51ee02787a4568fc06173972f5a3 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 3 Feb 2020 11:20:45 +0100
Subject: [PATCH 010/265] Fix pep8

---
 skopt/learning/tests/test_forest.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/skopt/learning/tests/test_forest.py b/skopt/learning/tests/test_forest.py
index 27d03b01b..eecf6d217 100644
--- a/skopt/learning/tests/test_forest.py
+++ b/skopt/learning/tests/test_forest.py
@@ -37,7 +37,8 @@ def test_random_forest():
 
     clf = RandomForestRegressor(n_estimators=10, criterion="mse",
                                 max_depth=None, min_samples_split=2,
-                                min_samples_leaf=1, min_weight_fraction_leaf=0.,
+                                min_samples_leaf=1,
+                                min_weight_fraction_leaf=0.,
                                 max_features="auto", max_leaf_nodes=None,
                                 min_impurity_decrease=0., bootstrap=True,
                                 oob_score=False,
@@ -47,7 +48,8 @@ def test_random_forest():
     assert_array_equal(clf.predict(T), true_result)
     assert 10 == len(clf)
 
-    clf = RandomForestRegressor(n_estimators=10, max_features=1, random_state=1)
+    clf = RandomForestRegressor(n_estimators=10, max_features=1,
+                                random_state=1)
     clf.fit(X, y)
     assert_array_equal(clf.predict(T), true_result)
     assert 10 == len(clf)

From 3ec59b50484184f57a080f8ab5402f1ae1091b00 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 3 Feb 2020 11:56:38 +0100
Subject: [PATCH 011/265] Fix n_jobs parameter for sklearn 0.19

---
 skopt/learning/tests/test_forest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skopt/learning/tests/test_forest.py b/skopt/learning/tests/test_forest.py
index eecf6d217..0711cde9d 100644
--- a/skopt/learning/tests/test_forest.py
+++ b/skopt/learning/tests/test_forest.py
@@ -42,7 +42,7 @@ def test_random_forest():
                                 max_features="auto", max_leaf_nodes=None,
                                 min_impurity_decrease=0., bootstrap=True,
                                 oob_score=False,
-                                n_jobs=None, random_state=1,
+                                n_jobs=1, random_state=1,
                                 verbose=0, warm_start=False)
     clf.fit(X, y)
     assert_array_equal(clf.predict(T), true_result)
@@ -86,7 +86,7 @@ def test_extra_forest():
                               max_features="auto", max_leaf_nodes=None,
                               min_impurity_decrease=0., bootstrap=False,
                               oob_score=False,
-                              n_jobs=None, random_state=1,
+                              n_jobs=1, random_state=1,
                               verbose=0, warm_start=False)
     clf.fit(X, y)
     assert_array_equal(clf.predict(T), true_result)

From 3c16258733b35311437d25788aafcd7c165d3983 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 3 Feb 2020 17:54:49 +0100
Subject: [PATCH 012/265] Add partial dependency plots

* Code was taken from https://github.com/bytesandbrains/ProcessOptimizer
* Thanks to @sigurdcarlsen
---
 examples/partial-dependence-plot.py |  92 +++++++++++++++
 skopt/plots.py                      | 173 +++++++++++++++++++++++-----
 skopt/space/space.py                |   5 +
 3 files changed, 239 insertions(+), 31 deletions(-)
 create mode 100644 examples/partial-dependence-plot.py

diff --git a/examples/partial-dependence-plot.py b/examples/partial-dependence-plot.py
new file mode 100644
index 000000000..0c405f24a
--- /dev/null
+++ b/examples/partial-dependence-plot.py
@@ -0,0 +1,92 @@
+"""
+========================
+Partial Dependence Plots
+========================
+
+Sigurd Carlsen Feb 2019
+Reformatted by Holger Nahrstaedt 2020
+
+.. currentmodule:: skopt
+
+This notebook serves to showcase the new features that are being added to
+the scikit-optimize toolbox.
+"""
+print(__doc__)
+import sys
+from skopt.plots import plot_objective
+from skopt import forest_minimize
+import numpy as np
+np.random.seed(123)
+import matplotlib.pyplot as plt
+
+
+#############################################################################
+# plot_objective
+# ==============
+# Plot objective now supports optional use of partial dependence as well as
+# different methods of defining parameter values for dependency plots
+
+# Here we define a function that we evaluate.
+def funny_func(x):
+    s = 0
+    for i in range(len(x)):
+        s += (x[i] * i) ** 2
+    return s
+
+
+#############################################################################
+
+# We run forest_minimize on the function
+bounds = [(-1, 1.), ] * 4
+n_calls = 150
+
+result = forest_minimize(funny_func, bounds, n_calls=n_calls, base_estimator="ET",
+                         random_state=4)
+
+# Here we see an example of using partial dependence. Even when setting
+# n_points all the way down to 10 from the default of 40, this method is
+# still very slow. This is because partial dependence calculates 250 extra
+# predictions for each point on the plots.
+
+
+_ = plot_objective(result, usepartialdependence=True, n_points=10)
+
+# Here we plot without partial dependence. We see that it is a lot faster.
+# Also the values for the other parameters are set to the default "result"
+# which is the parameter set of the best observed value so far. In the case
+# of funny_func this is close to 0 for all parameters.
+
+_ = plot_objective(result, usepartialdependence=False, n_points=10)
+
+# Here we try with setting the other parameters to something other than
+# "result". First we try with "expected_minimum" which is the set of
+# parameters that gives the miniumum value of the surogate function,
+# using scipys minimum search method.
+
+_ = plot_objective(result, usepartialdependence=False, n_points=10,
+                   pars='expected_minimum')
+
+# "expected_minimum_random" is a naive way of finding the minimum of the
+# surogate by only using random sampling:
+
+_ = plot_objective(result, usepartialdependence=False, n_points=10,
+                   pars='expected_minimum_random')
+
+# Lastly we can also define these parameters ourselfs by parsing a list
+# as the pars argument:
+
+_ = plot_objective(result, usepartialdependence=False,
+                   n_points=10, pars=[1, -0.5, 0.5, 0])
+
+# We can also specify how many intial samples are used for the two different
+# "expected_minimum" methods. We set it to a low value in the next examples
+# to showcase how it affects the minimum for the two methods.
+
+_ = plot_objective(result, usepartialdependence=False, n_points=10,
+                   pars='expected_minimum_random',
+                   expected_minimum_samples=10)
+
+#############################################################################
+
+_ = plot_objective(result, usepartialdependence=False, n_points=10,
+                   pars='expected_minimum', expected_minimum_samples=1)
diff --git a/skopt/plots.py b/skopt/plots.py
index 45a2e2b4d..6ce1df092 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -5,11 +5,13 @@
 from functools import partial
 from scipy.optimize import OptimizeResult
 
+from skopt import expected_minimum
 from .space import Categorical
 
 # For plot tests, matplotlib must be set to headless mode early
 if 'pytest' in sys.modules:
     import matplotlib
+
     matplotlib.use('Agg')
 
 import matplotlib.pyplot as plt
@@ -214,7 +216,7 @@ def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None):
 
             if j > i:
                 ax_.axis("off")
-            elif i > j:        # off-diagonal plots
+            elif i > j:  # off-diagonal plots
                 # plots on the diagonal are special, like Texas. They have
                 # their own range so do not mess with them.
                 if not iscat[i]:  # bounds not meaningful for categoricals
@@ -222,12 +224,12 @@ def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None):
                 if iscat[j]:
                     # partial() avoids creating closures in a loop
                     ax_.xaxis.set_major_formatter(FuncFormatter(
-                            partial(_cat_format, space.dimensions[j])))
+                        partial(_cat_format, space.dimensions[j])))
                 else:
                     ax_.set_xlim(*space.dimensions[j].bounds)
-                if j == 0:      # only leftmost column (0) gets y labels
+                if j == 0:  # only leftmost column (0) gets y labels
                     ax_.set_ylabel(dim_labels[i])
-                    if iscat[i]:    # Set category labels for left column
+                    if iscat[i]:  # Set category labels for left column
                         ax_.yaxis.set_major_formatter(FuncFormatter(
                             partial(_cat_format, space.dimensions[i])))
                 else:
@@ -254,7 +256,7 @@ def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None):
                     ax_.yaxis.set_major_locator(MaxNLocator(6, prune='both',
                                                             integer=iscat[i]))
 
-            else:       # diagonal plots
+            else:  # diagonal plots
                 ax_.set_ylim(*diagonal_ylim)
                 ax_.yaxis.tick_right()
                 ax_.yaxis.set_label_position('right')
@@ -278,13 +280,13 @@ def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None):
 
 
 def partial_dependence(space, model, i, j=None, sample_points=None,
-                       n_samples=250, n_points=40):
-    """Calculate the partial dependence for dimensions `i` and `j` with
+                       n_samples=250, n_points=40, x_eval=None):
+    """Calculate the (partial-) dependence for dimensions `i` and `j` with
     respect to the objective value, as approximated by `model`.
+    If x_eval is set to "None", the partial dependence will be calculated.
 
-    The partial dependence plot shows how the value of the dimensions
-    `i` and `j` influence the `model` predictions after "averaging out"
-    the influence of all other dimensions.
+    The dependence plot shows how the value of the dimensions
+    `i` and `j` influence the `model`.
 
     Parameters
     ----------
@@ -302,17 +304,26 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
         To calculate the 1D partial dependence on `i` alone set `j=None`.
 
     sample_points : np.array, shape=(n_points, n_dims), default=None
+        Only used when `x_eval=None`, i.e in case partial dependence should
+        be calculated.
         Randomly sampled and transformed points to use when averaging
-        the model function at each of the `n_points`.
+        the model function at each of the `n_points` when using partial
+        dependence.
 
     n_samples : int, default=100
         Number of random samples to use for averaging the model function
-        at each of the `n_points`. Only used when `sample_points=None`.
+        at each of the `n_points` when using partial dependence. Only used 
+        when `sample_points=None` and `x_eval=None`.
 
     n_points : int, default=40
         Number of points at which to evaluate the partial dependence
         along each dimension `i` and `j`.
 
+    x_eval : list, default=None
+        x_eval is a list of parameter values or None. In case `x_eval`
+        is not None, the parsed dependence will be calculated using these values.
+        Otherwise, the partial dependence is calculated.
+
     Returns
     -------
     For 1D partial dependence:
@@ -335,25 +346,34 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
     For Categorical variables, the `xi` (and `yi` for 2D) returned are
     the indices of the variable in `Dimension.categories`.
     """
-    # The idea is to step through one dimension, evaluating the model with
-    # that dimension fixed and averaging over random values in all other
-    # dimensions.  (Or step through 2 dimensions when i and j are given.)
+    # The idea is to step through one dimension and evaluating the model with
+    # that dimension fixed.  (Or step through 2 dimensions when i and j are given.)
     # Categorical dimensions make this interesting, because they are one-
     # hot-encoded, so there is a one-to-many mapping of input dimensions
     # to transformed (model) dimensions.
 
-    if sample_points is None:
+    # If we havent parsed an x_eval list we use random sampled values instead
+    if x_eval is None:
         sample_points = space.transform(space.rvs(n_samples=n_samples))
+    else:
+        sample_points = space.transform([x_eval])
 
-    # dim_locs[i] is the (column index of the) start of dim i in sample_points
+    # dim_locs[i] is the (column index of the) start of dim i in sample_points.
+    # This is usefull when we are using one hot encoding, i.e using categorical values
     dim_locs = np.cumsum([0] + [d.transformed_size for d in space.dimensions])
 
     if j is None:
+        # We sample evenly instead of randomly. This is necessary when using
+        # categorical values
         xi, xi_transformed = _evenly_sample(space.dimensions[i], n_points)
         yi = []
         for x_ in xi_transformed:
-            rvs_ = np.array(sample_points)      # copy
+            rvs_ = np.array(sample_points)  # copy
+            # We replace the values in the dimension that we want to keep fixed
             rvs_[:, dim_locs[i]:dim_locs[i + 1]] = x_
+            # In case of `x_eval=None` rvs conists of random samples.
+            # Calculating the mean of these samples is how partial dependence
+            # is implemented.
             yi.append(np.mean(model.predict(rvs_)))
 
         return xi, yi
@@ -366,7 +386,7 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
         for x_ in xi_transformed:
             row = []
             for y_ in yi_transformed:
-                rvs_ = np.array(sample_points)      # copy
+                rvs_ = np.array(sample_points)  # copy
                 rvs_[:, dim_locs[j]:dim_locs[j + 1]] = x_
                 rvs_[:, dim_locs[i]:dim_locs[i + 1]] = y_
                 row.append(np.mean(model.predict(rvs_)))
@@ -376,18 +396,20 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
 
 
 def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
-                   zscale='linear', dimensions=None):
-    """Pairwise partial dependence plot of the objective function.
+                   zscale='linear', dimensions=None, usepartialdependence=True, pars='result',
+                   expected_minimum_samples=None):
+    """Pairwise dependence plot of the objective function.
 
-    The diagonal shows the partial dependence for dimension `i` with
+    The diagonal shows the dependence for dimension `i` with
     respect to the objective function. The off-diagonal shows the
-    partial dependence for dimensions `i` and `j` with
+    dependence for dimensions `i` and `j` with
     respect to the objective function. The objective function is
     approximated by `result.model.`
 
     Pairwise scatter plots of the points at which the objective
     function was directly evaluated are shown on the off-diagonal.
-    A red point indicates the found minimum.
+    A red point indicates per default the best observed minimum, but
+    this can be changed by changing argument ´pars´.
 
     Parameters
     ----------
@@ -418,14 +440,84 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         variables. `None` defaults to `space.dimensions[i].name`, or
         if also `None` to `['X_0', 'X_1', ..]`.
 
+    usepartialdependence : bool, default=false
+        Wether to use partial
+        dependence or not when calculating dependence. If false plot_objective
+        will parse values to the dependence function, defined by the pars argument
+
+    pars : str, default = 'result' or list of floats
+        Defines the values for the red
+        points in the plots, and if partialdependence is false, this argument also 
+        defines values for all other parameters when calculating dependence.
+        Valid strings:  'result' - Use best observed parameters
+                        'expected_minimum' - Parameters that gives the best minimum
+                            Calculated using scipy's minimize method. This method
+                            currently does not work with categorical values.
+                        'expected_minimum_random' - Parameters that gives the best minimum
+                            when using naive random sampling. Works with categorical values
+    expected_minimum_samples : float, default = None
+        Determines how many points should be evaluated
+        to find the minimum when using 'expected_minimum' or 'expected_minimum_random'
+
     Returns
     -------
     ax : `Axes`
         The matplotlib axes.
     """
+    # Here we define the values for which to plot the red dot (2d plot) and the red dotted line (1d plot).
+    # These same values will be used for evaluating the plots when calculating dependence. (Unless partial
+    # dependence is to be used instead).
     space = result.space
+    if isinstance(pars, str):
+        if pars == 'result':
+            # Using the best observed result
+            x_vals = result.x
+        elif pars == 'expected_minimum':
+            if result.space.is_partly_categorical:
+                # space is also categorical
+                raise ValueError('expected_minimum does not support any'
+                                 'categorical values')
+            # Do a gradient based minimum search using scipys own minimizer
+            if expected_minimum_samples:
+                # If a value for
+                # expected_minimum_samples has been parsed
+                x_vals, _ = expected_minimum(result,
+                                             n_random_starts=expected_minimum_samples,
+                                             random_state=None)
+            else:  # Use standard of 20 random starting points
+                x_vals, _ = expected_minimum(result,
+                                             n_random_starts=20,
+                                             random_state=None)
+        elif pars == 'expected_minimum_random':
+            # Do a minimum search by evaluating the function with
+            # n_samples sample values
+            if expected_minimum_samples:
+                # If a value for
+                # expected_minimum_samples has been parsed
+                x_vals = expected_min_random_sampling(result.models[-1], space,
+                                                      n_samples=expected_minimum_samples)
+            else:
+                # Use standard of 10^n_parameters. Note this
+                # becomes very slow for many parameters
+                x_vals = expected_min_random_sampling(result.models[-1], space,
+                                                      n_samples=10 ** len(result.x))
+        else:
+            raise ValueError('Argument ´pars´ must be a valid'
+                             'string (´result´)')
+    elif isinstance(pars, list):
+        assert len(pars) == len(result.x), 'Argument ´pars´ of type list must' \
+                                           'have same length as number of features'
+        # Using defined x_values
+        x_vals = pars
+    else:
+        raise ValueError('Argument ´pars´ must be a string or a list')
+
+    if usepartialdependence:
+        x_eval = None
+    else:
+        x_eval = x_vals
     rvs_transformed = space.transform(space.rvs(n_samples=n_samples))
-    samples, minimum, _ = _map_categories(space, result.x_iters, result.x)
+    samples, minimum, _ = _map_categories(space, result.x_iters, x_vals)
 
     if zscale == 'log':
         locator = LogLocator()
@@ -447,7 +539,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                 xi, yi = partial_dependence(space, result.models[-1], i,
                                             j=None,
                                             sample_points=rvs_transformed,
-                                            n_points=n_points)
+                                            n_points=n_points, x_eval=x_eval)
 
                 ax[i, i].plot(xi, yi)
                 ax[i, i].axvline(minimum[i], linestyle="--", color="r", lw=1)
@@ -456,15 +548,18 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
             elif i > j:
                 xi, yi, zi = partial_dependence(space, result.models[-1],
                                                 i, j,
-                                                rvs_transformed, n_points)
+                                                rvs_transformed, n_points, x_eval=x_eval)
                 ax[i, j].contourf(xi, yi, zi, levels,
                                   locator=locator, cmap='viridis_r')
                 ax[i, j].scatter(samples[:, j], samples[:, i],
                                  c='k', s=10, lw=0.)
                 ax[i, j].scatter(minimum[j], minimum[i],
                                  c=['r'], s=20, lw=0.)
-
-    return _format_scatter_plot_axes(ax, space, ylabel="Partial dependence",
+    if usepartialdependence:
+        ylabel = "Partial dependence"
+    else:
+        ylabel = "Dependence"
+    return _format_scatter_plot_axes(ax, space, ylabel=ylabel,
                                      dim_labels=dimensions)
 
 
@@ -521,7 +616,7 @@ def plot_evaluations(result, bins=20, dimensions=None):
                 else:
                     bins_ = bins
                 ax[i, i].hist(samples[:, j], bins=bins_, range=None if iscat[j]
-                              else space.dimensions[j].bounds)
+                else space.dimensions[j].bounds)
 
             # lower triangle
             elif i > j:
@@ -592,7 +687,7 @@ def _evenly_sample(dim, n_points):
         The transformed values of `xi`, for feeding to a model.
     """
     cats = np.array(getattr(dim, 'categories', []), dtype=object)
-    if len(cats):   # Sample categoricals while maintaining order
+    if len(cats):  # Sample categoricals while maintaining order
         xi = np.linspace(0, len(cats) - 1, min(len(cats), n_points), dtype=int)
         xi_transformed = dim.transform(cats[xi])
     else:
@@ -607,3 +702,19 @@ def _cat_format(dimension, x, _):
     """Categorical axis tick formatter function.  Returns the name of category
     `x` in `dimension`.  Used with `matplotlib.ticker.FuncFormatter`."""
     return str(dimension.categories[int(x)])
+
+
+def expected_min_random_sampling(model, space, n_samples=100000):
+    """Minimum search by doing naive random sampling, Returns the parameters
+    that gave the minimum function value"""
+    if n_samples > 100000:
+        n_samples = 100000
+    # sample points from search space
+    random_samples = space.rvs(n_samples=n_samples)
+
+    # make estimations with surrogate
+    y_random = model.predict(space.transform(random_samples))
+    index_best_objective = np.argmin(y_random)
+    min_x = random_samples[index_best_objective]
+
+    return min_x
diff --git a/skopt/space/space.py b/skopt/space/space.py
index 3b8a6b0ff..100051310 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -902,6 +902,11 @@ def is_categorical(self):
         """Space contains exclusively categorical dimensions"""
         return all([isinstance(dim, Categorical) for dim in self.dimensions])
 
+    @property
+    def is_partly_categorical(self):
+        """Space contains any categorical dimensions"""
+        return any([isinstance(dim, Categorical) for dim in self.dimensions])
+
     def distance(self, point_a, point_b):
         """Compute distance between two points in this space.
 

From 6fd147bfe1c507dc8e83d55774cf7f50f85e924e Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 4 Feb 2020 10:32:37 +0100
Subject: [PATCH 013/265] Fix issue #831

* Add unit tests to confirm that bug is fixed
---
 skopt/searchcv.py            |  8 ++++++-
 skopt/tests/test_searchcv.py | 41 +++++++++++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 10765047a..54c6faced 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -379,7 +379,6 @@ def _fit(self, X, y, groups, parameter_iterable):
         Taken from https://github.com/scikit-learn/scikit-learn/blob/0.18.X
                     .../sklearn/model_selection/_search.py
         """
-
         estimator = self.estimator
         cv = sklearn.model_selection._validation.check_cv(
             self.cv, y, classifier=is_classifier(estimator))
@@ -570,6 +569,13 @@ def _step(self, X, y, search_space, optimizer, groups=None, n_points=1):
         for k in self.cv_results_:
             all_cv_results[k].extend(self.cv_results_[k])
 
+        all_cv_results["rank_test_score"] = list(np.asarray(
+            rankdata(-np.array(all_cv_results['mean_test_score']),
+                     method='min'), dtype=np.int32))
+        if self.return_train_score:
+            all_cv_results["rank_train_score"] = list(np.asarray(
+                rankdata(-np.array(all_cv_results['mean_train_score']),
+                         method='min'), dtype=np.int32))
         self.cv_results_ = all_cv_results
         self.best_index_ = np.argmax(self.cv_results_['mean_test_score'])
 
diff --git a/skopt/tests/test_searchcv.py b/skopt/tests/test_searchcv.py
index 1df759d1d..1a5e1d7d0 100644
--- a/skopt/tests/test_searchcv.py
+++ b/skopt/tests/test_searchcv.py
@@ -13,7 +13,9 @@
 from sklearn.base import clone
 from sklearn.base import BaseEstimator
 from joblib import cpu_count
-
+from scipy.stats import rankdata
+import numpy as np
+from numpy.testing import assert_array_equal
 from skopt.space import Real, Categorical, Integer
 from skopt import BayesSearchCV
 
@@ -269,6 +271,43 @@ def test_searchcv_reproducibility():
     assert getattr(best_est, 'kernel') == getattr(best_est2, 'kernel')
 
 
+@pytest.mark.fast_test
+def test_searchcv_rank():
+    """
+    Test whether results of BayesSearchCV can be reproduced with a fixed
+    random state.
+    """
+
+    X, y = load_iris(True)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, train_size=0.75, random_state=0
+    )
+
+    random_state = 42
+
+    opt = BayesSearchCV(
+        SVC(random_state=random_state),
+        {
+            'C': Real(1e-6, 1e+6, prior='log-uniform'),
+            'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
+            'degree': Integer(1, 8),
+            'kernel': Categorical(['linear', 'poly', 'rbf']),
+        },
+        n_iter=11, random_state=random_state, return_train_score=True
+    )
+
+    opt.fit(X_train, y_train)
+    results = opt.cv_results_
+
+    test_rank = np.asarray(rankdata(-np.array(results["mean_test_score"]),
+                                    method='min'), dtype=np.int32)
+    train_rank = np.asarray(rankdata(-np.array(results["mean_train_score"]),
+                                     method='min'), dtype=np.int32)
+
+    assert_array_equal(np.array(results['rank_test_score']), test_rank)
+    assert_array_equal(np.array(results['rank_train_score']), train_rank)
+
+
 def test_searchcv_refit():
     """
     Test whether results of BayesSearchCV can be reproduced with a fixed

From 0e7debb2724a624ba37ebec2b3ee9406d5ffd42c Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 4 Feb 2020 11:54:19 +0100
Subject: [PATCH 014/265] Fix version and release string in doc

* Prepare next release
---
 doc/conf.py              |  8 ++++----
 doc/whats_new/v0.7.2.rst | 18 ++++++++++++++++++
 skopt/__init__.py        |  2 +-
 3 files changed, 23 insertions(+), 5 deletions(-)
 create mode 100644 doc/whats_new/v0.7.2.rst

diff --git a/doc/conf.py b/doc/conf.py
index 780b5dbd8..a1945bfc5 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -19,16 +19,15 @@
 import warnings
 import os
 import re
+from packaging.version import parse
 # import pkg_resources
 import sys
 import skopt
 
 sys.path.insert(0, os.path.abspath('sphinxext'))
-
 from github_link import make_linkcode_resolve
 import sphinx_gallery
 
-__version__ = ".".join(skopt.__version__.split(".")[:2])
 #  __version__ = pkg_resources.get_distribution('skopt').version
 on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 
@@ -39,9 +38,10 @@
 author = 'The scikit-optimize contributors'
 
 # The short X.Y version
-version = __version__
+version = parse(skopt.__version__).base_version
+version = ".".join(version.split(".")[:2])
 # The full version, including alpha/beta/rc tags
-release = __version__
+release = skopt.__version__
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/doc/whats_new/v0.7.2.rst b/doc/whats_new/v0.7.2.rst
new file mode 100644
index 000000000..66ed2385a
--- /dev/null
+++ b/doc/whats_new/v0.7.2.rst
@@ -0,0 +1,18 @@
+Version 0.7.2
+=============
+**Not released**
+New features
+------------
+
+
+Bug fixes
+---------
+
+* Fix searchcv rank (issue #830)
+* Fix random forest regressor (issue #766)
+
+Maintenance
+-----------
+
+* Fix license detection in github
+
diff --git a/skopt/__init__.py b/skopt/__init__.py
index c0ae1e311..507f5a089 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -29,7 +29,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = "0.7.1"
+__version__ = "0.7.2"
 
 if __SKOPT_SETUP__:
     import sys

From c984d890b31656cd7581c124f2520faab757e46c Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 4 Feb 2020 12:37:37 +0100
Subject: [PATCH 015/265] Rename pars parameter to eval_min_params

Improve plot unit test
---
 examples/partial-dependence-plot.py | 12 +++++------
 skopt/plots.py                      | 32 +++++++++++++++--------------
 skopt/tests/test_plots.py           |  6 ++++++
 3 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/examples/partial-dependence-plot.py b/examples/partial-dependence-plot.py
index 0c405f24a..ac4881581 100644
--- a/examples/partial-dependence-plot.py
+++ b/examples/partial-dependence-plot.py
@@ -64,29 +64,29 @@ def funny_func(x):
 # using scipys minimum search method.
 
 _ = plot_objective(result, usepartialdependence=False, n_points=10,
-                   pars='expected_minimum')
+                   eval_min_params='expected_minimum')
 
 # "expected_minimum_random" is a naive way of finding the minimum of the
 # surogate by only using random sampling:
 
 _ = plot_objective(result, usepartialdependence=False, n_points=10,
-                   pars='expected_minimum_random')
+                   eval_min_params='expected_minimum_random')
 
 # Lastly we can also define these parameters ourselfs by parsing a list
-# as the pars argument:
+# as the eval_min_params argument:
 
 _ = plot_objective(result, usepartialdependence=False,
-                   n_points=10, pars=[1, -0.5, 0.5, 0])
+                   n_points=10, eval_min_params=[1, -0.5, 0.5, 0])
 
 # We can also specify how many intial samples are used for the two different
 # "expected_minimum" methods. We set it to a low value in the next examples
 # to showcase how it affects the minimum for the two methods.
 
 _ = plot_objective(result, usepartialdependence=False, n_points=10,
-                   pars='expected_minimum_random',
+                   eval_min_params='expected_minimum_random',
                    expected_minimum_samples=10)
 
 #############################################################################
 
 _ = plot_objective(result, usepartialdependence=False, n_points=10,
-                   pars='expected_minimum', expected_minimum_samples=1)
+                   eval_min_params='expected_minimum', expected_minimum_samples=1)
diff --git a/skopt/plots.py b/skopt/plots.py
index 6ce1df092..534f75109 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -352,7 +352,7 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
     # hot-encoded, so there is a one-to-many mapping of input dimensions
     # to transformed (model) dimensions.
 
-    # If we havent parsed an x_eval list we use random sampled values instead
+    # If we haven't parsed an x_eval list we use random sampled values instead
     if x_eval is None:
         sample_points = space.transform(space.rvs(n_samples=n_samples))
     else:
@@ -396,7 +396,8 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
 
 
 def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
-                   zscale='linear', dimensions=None, usepartialdependence=True, pars='result',
+                   zscale='linear', dimensions=None, usepartialdependence=True,
+                   eval_min_params='result',
                    expected_minimum_samples=None):
     """Pairwise dependence plot of the objective function.
 
@@ -409,7 +410,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     Pairwise scatter plots of the points at which the objective
     function was directly evaluated are shown on the off-diagonal.
     A red point indicates per default the best observed minimum, but
-    this can be changed by changing argument ´pars´.
+    this can be changed by changing argument ´eval_min_params´.
 
     Parameters
     ----------
@@ -441,11 +442,11 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         if also `None` to `['X_0', 'X_1', ..]`.
 
     usepartialdependence : bool, default=false
-        Wether to use partial
+        Whether to use partial
         dependence or not when calculating dependence. If false plot_objective
         will parse values to the dependence function, defined by the pars argument
 
-    pars : str, default = 'result' or list of floats
+    eval_min_params : str, default = 'result' or list of floats
         Defines the values for the red
         points in the plots, and if partialdependence is false, this argument also 
         defines values for all other parameters when calculating dependence.
@@ -468,11 +469,11 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     # These same values will be used for evaluating the plots when calculating dependence. (Unless partial
     # dependence is to be used instead).
     space = result.space
-    if isinstance(pars, str):
-        if pars == 'result':
+    if isinstance(eval_min_params, str):
+        if eval_min_params == 'result':
             # Using the best observed result
             x_vals = result.x
-        elif pars == 'expected_minimum':
+        elif eval_min_params == 'expected_minimum':
             if result.space.is_partly_categorical:
                 # space is also categorical
                 raise ValueError('expected_minimum does not support any'
@@ -488,7 +489,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                 x_vals, _ = expected_minimum(result,
                                              n_random_starts=20,
                                              random_state=None)
-        elif pars == 'expected_minimum_random':
+        elif eval_min_params == 'expected_minimum_random':
             # Do a minimum search by evaluating the function with
             # n_samples sample values
             if expected_minimum_samples:
@@ -502,15 +503,16 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                 x_vals = expected_min_random_sampling(result.models[-1], space,
                                                       n_samples=10 ** len(result.x))
         else:
-            raise ValueError('Argument ´pars´ must be a valid'
+            raise ValueError('Argument ´eval_min_params´ must be a valid'
                              'string (´result´)')
-    elif isinstance(pars, list):
-        assert len(pars) == len(result.x), 'Argument ´pars´ of type list must' \
-                                           'have same length as number of features'
+    elif isinstance(eval_min_params, list):
+        assert len(eval_min_params) == len(result.x), 'Argument' \
+            '´eval_min_params´ of type list must have same length as' \
+            'number of features'
         # Using defined x_values
-        x_vals = pars
+        x_vals = eval_min_params
     else:
-        raise ValueError('Argument ´pars´ must be a string or a list')
+        raise ValueError('Argument ´eval_min_params´ must be a string or a list')
 
     if usepartialdependence:
         x_eval = None
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index 016d63f30..149f0a6af 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -40,6 +40,12 @@ def objective(params):
     plots.plot_convergence(res)
     plots.plot_evaluations(res)
     plots.plot_objective(res)
+    plots.plot_objective(res,
+                         eval_min_params='expected_minimum_random')
+    plots.plot_objective(res,
+                         eval_min_params='expected_minimum')
+    plots.plot_objective(res,
+                         usepartialdependence=True)
     plots.plot_regret(res)
 
     # TODO: Compare plots to known good results?

From 4fea165f5052e0f5963dad750ed5bde7dc38d059 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 4 Feb 2020 13:44:36 +0100
Subject: [PATCH 016/265] Improve parameter names and documentation

---
 examples/partial-dependence-plot.py |  25 +++--
 skopt/plots.py                      | 150 +++++++++++++++-------------
 skopt/tests/test_plots.py           |   6 +-
 3 files changed, 96 insertions(+), 85 deletions(-)

diff --git a/examples/partial-dependence-plot.py b/examples/partial-dependence-plot.py
index ac4881581..7ea0676c5 100644
--- a/examples/partial-dependence-plot.py
+++ b/examples/partial-dependence-plot.py
@@ -49,44 +49,43 @@ def funny_func(x):
 # predictions for each point on the plots.
 
 
-_ = plot_objective(result, usepartialdependence=True, n_points=10)
+_ = plot_objective(result, samples='result', n_points=10)
 
 # Here we plot without partial dependence. We see that it is a lot faster.
 # Also the values for the other parameters are set to the default "result"
 # which is the parameter set of the best observed value so far. In the case
 # of funny_func this is close to 0 for all parameters.
 
-_ = plot_objective(result, usepartialdependence=False, n_points=10)
+_ = plot_objective(result,  n_points=10)
 
 # Here we try with setting the other parameters to something other than
 # "result". First we try with "expected_minimum" which is the set of
 # parameters that gives the miniumum value of the surogate function,
 # using scipys minimum search method.
 
-_ = plot_objective(result, usepartialdependence=False, n_points=10,
-                   eval_min_params='expected_minimum')
+_ = plot_objective(result,  n_points=10,
+                   miminum='expected_minimum')
 
 # "expected_minimum_random" is a naive way of finding the minimum of the
 # surogate by only using random sampling:
 
-_ = plot_objective(result, usepartialdependence=False, n_points=10,
-                   eval_min_params='expected_minimum_random')
+_ = plot_objective(result, n_points=10,
+                   miminum='expected_minimum_random')
 
 # Lastly we can also define these parameters ourselfs by parsing a list
-# as the eval_min_params argument:
+# as the minimum argument:
 
-_ = plot_objective(result, usepartialdependence=False,
-                   n_points=10, eval_min_params=[1, -0.5, 0.5, 0])
+_ = plot_objective(result, n_points=10, minimum=[1, -0.5, 0.5, 0])
 
 # We can also specify how many intial samples are used for the two different
 # "expected_minimum" methods. We set it to a low value in the next examples
 # to showcase how it affects the minimum for the two methods.
 
-_ = plot_objective(result, usepartialdependence=False, n_points=10,
-                   eval_min_params='expected_minimum_random',
+_ = plot_objective(result, n_points=10,
+                   miminum='expected_minimum_random',
                    expected_minimum_samples=10)
 
 #############################################################################
 
-_ = plot_objective(result, usepartialdependence=False, n_points=10,
-                   eval_min_params='expected_minimum', expected_minimum_samples=1)
+_ = plot_objective(result, n_points=10,
+                   miminum='expected_minimum', expected_minimum_samples=1)
diff --git a/skopt/plots.py b/skopt/plots.py
index 534f75109..4c2758864 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -281,12 +281,15 @@ def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None):
 
 def partial_dependence(space, model, i, j=None, sample_points=None,
                        n_samples=250, n_points=40, x_eval=None):
-    """Calculate the (partial-) dependence for dimensions `i` and `j` with
+    """Calculate the partial dependence for dimensions `i` and `j` with
     respect to the objective value, as approximated by `model`.
-    If x_eval is set to "None", the partial dependence will be calculated.
 
-    The dependence plot shows how the value of the dimensions
-    `i` and `j` influence the `model`.
+    The partial dependence plot shows how the value of the dimensions
+    `i` and `j` influence the `model` predictions after "averaging out"
+    the influence of all other dimensions.
+
+    When `x_eval` is not `None`, the given values are used instead of
+    random samples. In this case, `n_samples` will be ignored.
 
     Parameters
     ----------
@@ -320,9 +323,9 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
         along each dimension `i` and `j`.
 
     x_eval : list, default=None
-        x_eval is a list of parameter values or None. In case `x_eval`
+        `x_eval` is a list of parameter values or None. In case `x_eval`
         is not None, the parsed dependence will be calculated using these values.
-        Otherwise, the partial dependence is calculated.
+        Otherwise, random selected samples will be used.
 
     Returns
     -------
@@ -353,9 +356,9 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
     # to transformed (model) dimensions.
 
     # If we haven't parsed an x_eval list we use random sampled values instead
-    if x_eval is None:
+    if x_eval is None and sample_points is None:
         sample_points = space.transform(space.rvs(n_samples=n_samples))
-    else:
+    elif sample_points is None:
         sample_points = space.transform([x_eval])
 
     # dim_locs[i] is the (column index of the) start of dim i in sample_points.
@@ -396,9 +399,8 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
 
 
 def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
-                   zscale='linear', dimensions=None, usepartialdependence=True,
-                   eval_min_params='result',
-                   expected_minimum_samples=None):
+                   zscale='linear', dimensions=None, samples='random',
+                   minimum='result', expected_minimum_samples=None):
     """Pairwise dependence plot of the objective function.
 
     The diagonal shows the dependence for dimension `i` with
@@ -441,14 +443,20 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         variables. `None` defaults to `space.dimensions[i].name`, or
         if also `None` to `['X_0', 'X_1', ..]`.
 
-    usepartialdependence : bool, default=false
-        Whether to use partial
-        dependence or not when calculating dependence. If false plot_objective
-        will parse values to the dependence function, defined by the pars argument
+    samples : str or list of floats, default='random'
+        Defines to samples generation to use for averaging the model function
+        at each of the `n_points`.
+        Valid strings:  'random' - `n_random` samples will used
+                        'result' - Use best observed parameters
+                        'expected_minimum' - Parameters that gives the best minimum
+                            Calculated using scipy's minimize method. This method
+                            currently does not work with categorical values.
+                        'expected_minimum_random' - Parameters that gives the best minimum
+                            when using naive random sampling. Works with categorical values
 
-    eval_min_params : str, default = 'result' or list of floats
+    minimum : str or list of floats, default = 'result'
         Defines the values for the red
-        points in the plots, and if partialdependence is false, this argument also 
+        points in the plots, and if `partialdependence` is false, this argument also
         defines values for all other parameters when calculating dependence.
         Valid strings:  'result' - Use best observed parameters
                         'expected_minimum' - Parameters that gives the best minimum
@@ -456,7 +464,8 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                             currently does not work with categorical values.
                         'expected_minimum_random' - Parameters that gives the best minimum
                             when using naive random sampling. Works with categorical values
-    expected_minimum_samples : float, default = None
+
+    expected_minimum_samples : int, default = None
         Determines how many points should be evaluated
         to find the minimum when using 'expected_minimum' or 'expected_minimum_random'
 
@@ -469,55 +478,11 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     # These same values will be used for evaluating the plots when calculating dependence. (Unless partial
     # dependence is to be used instead).
     space = result.space
-    if isinstance(eval_min_params, str):
-        if eval_min_params == 'result':
-            # Using the best observed result
-            x_vals = result.x
-        elif eval_min_params == 'expected_minimum':
-            if result.space.is_partly_categorical:
-                # space is also categorical
-                raise ValueError('expected_minimum does not support any'
-                                 'categorical values')
-            # Do a gradient based minimum search using scipys own minimizer
-            if expected_minimum_samples:
-                # If a value for
-                # expected_minimum_samples has been parsed
-                x_vals, _ = expected_minimum(result,
-                                             n_random_starts=expected_minimum_samples,
-                                             random_state=None)
-            else:  # Use standard of 20 random starting points
-                x_vals, _ = expected_minimum(result,
-                                             n_random_starts=20,
-                                             random_state=None)
-        elif eval_min_params == 'expected_minimum_random':
-            # Do a minimum search by evaluating the function with
-            # n_samples sample values
-            if expected_minimum_samples:
-                # If a value for
-                # expected_minimum_samples has been parsed
-                x_vals = expected_min_random_sampling(result.models[-1], space,
-                                                      n_samples=expected_minimum_samples)
-            else:
-                # Use standard of 10^n_parameters. Note this
-                # becomes very slow for many parameters
-                x_vals = expected_min_random_sampling(result.models[-1], space,
-                                                      n_samples=10 ** len(result.x))
-        else:
-            raise ValueError('Argument ´eval_min_params´ must be a valid'
-                             'string (´result´)')
-    elif isinstance(eval_min_params, list):
-        assert len(eval_min_params) == len(result.x), 'Argument' \
-            '´eval_min_params´ of type list must have same length as' \
-            'number of features'
-        # Using defined x_values
-        x_vals = eval_min_params
-    else:
-        raise ValueError('Argument ´eval_min_params´ must be a string or a list')
-
-    if usepartialdependence:
+    x_vals = evaluate_min_params(result, minimum, expected_minimum_samples)
+    if samples == "random":
         x_eval = None
     else:
-        x_eval = x_vals
+        x_eval = evaluate_min_params(result, samples, expected_minimum_samples)
     rvs_transformed = space.transform(space.rvs(n_samples=n_samples))
     samples, minimum, _ = _map_categories(space, result.x_iters, x_vals)
 
@@ -557,10 +522,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                                  c='k', s=10, lw=0.)
                 ax[i, j].scatter(minimum[j], minimum[i],
                                  c=['r'], s=20, lw=0.)
-    if usepartialdependence:
-        ylabel = "Partial dependence"
-    else:
-        ylabel = "Dependence"
+    ylabel = "Partial dependence"
     return _format_scatter_plot_axes(ax, space, ylabel=ylabel,
                                      dim_labels=dimensions)
 
@@ -720,3 +682,53 @@ def expected_min_random_sampling(model, space, n_samples=100000):
     min_x = random_samples[index_best_objective]
 
     return min_x
+
+
+def evaluate_min_params(result, params='result', expected_minimum_samples=None):
+    x_vals = None
+    space = result.space
+    if isinstance(params, str):
+        if params == 'result':
+            # Using the best observed result
+            x_vals = result.x
+        elif params == 'expected_minimum':
+            if result.space.is_partly_categorical:
+                # space is also categorical
+                raise ValueError('expected_minimum does not support any'
+                                 'categorical values')
+            # Do a gradient based minimum search using scipys own minimizer
+            if expected_minimum_samples:
+                # If a value for
+                # expected_minimum_samples has been parsed
+                x_vals, _ = expected_minimum(result,
+                                             n_random_starts=expected_minimum_samples,
+                                             random_state=None)
+            else:  # Use standard of 20 random starting points
+                x_vals, _ = expected_minimum(result,
+                                             n_random_starts=20,
+                                             random_state=None)
+        elif params == 'expected_minimum_random':
+            # Do a minimum search by evaluating the function with
+            # n_samples sample values
+            if expected_minimum_samples:
+                # If a value for
+                # expected_minimum_samples has been parsed
+                x_vals = expected_min_random_sampling(result.models[-1], space,
+                                                      n_samples=expected_minimum_samples)
+            else:
+                # Use standard of 10^n_parameters. Note this
+                # becomes very slow for many parameters
+                x_vals = expected_min_random_sampling(result.models[-1], space,
+                                                      n_samples=10 ** len(result.x))
+        else:
+            raise ValueError('Argument ´eval_min_params´ must be a valid'
+                             'string (´result´)')
+    elif isinstance(params, list):
+        assert len(params) == len(result.x), 'Argument' \
+            '´eval_min_params´ of type list must have same length as' \
+            'number of features'
+        # Using defined x_values
+        x_vals = params
+    else:
+        raise ValueError('Argument ´eval_min_params´ must be a string or a list')
+    return x_vals
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index 149f0a6af..cb2107d39 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -41,11 +41,11 @@ def objective(params):
     plots.plot_evaluations(res)
     plots.plot_objective(res)
     plots.plot_objective(res,
-                         eval_min_params='expected_minimum_random')
+                         minimum='expected_minimum_random')
     plots.plot_objective(res,
-                         eval_min_params='expected_minimum')
+                         minimum='expected_minimum')
     plots.plot_objective(res,
-                         usepartialdependence=True)
+                         samples='result')
     plots.plot_regret(res)
 
     # TODO: Compare plots to known good results?

From 6b67bc3349149e5736606861509347047afc0a00 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 4 Feb 2020 13:54:29 +0100
Subject: [PATCH 017/265] Fixes and doc improvements

---
 examples/partial-dependence-plot.py | 12 ++---
 skopt/plots.py                      | 78 +++++++++++++++++------------
 2 files changed, 53 insertions(+), 37 deletions(-)

diff --git a/examples/partial-dependence-plot.py b/examples/partial-dependence-plot.py
index 7ea0676c5..7006f477b 100644
--- a/examples/partial-dependence-plot.py
+++ b/examples/partial-dependence-plot.py
@@ -56,21 +56,21 @@ def funny_func(x):
 # which is the parameter set of the best observed value so far. In the case
 # of funny_func this is close to 0 for all parameters.
 
-_ = plot_objective(result,  n_points=10)
+_ = plot_objective(result, n_points=10)
 
 # Here we try with setting the other parameters to something other than
 # "result". First we try with "expected_minimum" which is the set of
 # parameters that gives the miniumum value of the surogate function,
 # using scipys minimum search method.
 
-_ = plot_objective(result,  n_points=10,
-                   miminum='expected_minimum')
+_ = plot_objective(result, n_points=10,
+                   mininum='expected_minimum')
 
 # "expected_minimum_random" is a naive way of finding the minimum of the
 # surogate by only using random sampling:
 
 _ = plot_objective(result, n_points=10,
-                   miminum='expected_minimum_random')
+                   mininum='expected_minimum_random')
 
 # Lastly we can also define these parameters ourselfs by parsing a list
 # as the minimum argument:
@@ -82,10 +82,10 @@ def funny_func(x):
 # to showcase how it affects the minimum for the two methods.
 
 _ = plot_objective(result, n_points=10,
-                   miminum='expected_minimum_random',
+                   mininum='expected_minimum_random',
                    expected_minimum_samples=10)
 
 #############################################################################
 
 _ = plot_objective(result, n_points=10,
-                   miminum='expected_minimum', expected_minimum_samples=1)
+                   mininum='expected_minimum', expected_minimum_samples=1)
diff --git a/skopt/plots.py b/skopt/plots.py
index 4c2758864..465c54ddd 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -315,7 +315,7 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
 
     n_samples : int, default=100
         Number of random samples to use for averaging the model function
-        at each of the `n_points` when using partial dependence. Only used 
+        at each of the `n_points` when using partial dependence. Only used
         when `sample_points=None` and `x_eval=None`.
 
     n_points : int, default=40
@@ -324,7 +324,8 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
 
     x_eval : list, default=None
         `x_eval` is a list of parameter values or None. In case `x_eval`
-        is not None, the parsed dependence will be calculated using these values.
+        is not None, the parsed dependence will be calculated using these
+        values.
         Otherwise, random selected samples will be used.
 
     Returns
@@ -350,7 +351,8 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
     the indices of the variable in `Dimension.categories`.
     """
     # The idea is to step through one dimension and evaluating the model with
-    # that dimension fixed.  (Or step through 2 dimensions when i and j are given.)
+    # that dimension fixed.  (Or step through 2 dimensions when i and j are
+    # given.)
     # Categorical dimensions make this interesting, because they are one-
     # hot-encoded, so there is a one-to-many mapping of input dimensions
     # to transformed (model) dimensions.
@@ -361,8 +363,10 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
     elif sample_points is None:
         sample_points = space.transform([x_eval])
 
-    # dim_locs[i] is the (column index of the) start of dim i in sample_points.
-    # This is usefull when we are using one hot encoding, i.e using categorical values
+    # dim_locs[i] is the (column index of the) start of dim i in
+    # sample_points.
+    # This is usefull when we are using one hot encoding, i.e using
+    # categorical values
     dim_locs = np.cumsum([0] + [d.transformed_size for d in space.dimensions])
 
     if j is None:
@@ -372,7 +376,8 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
         yi = []
         for x_ in xi_transformed:
             rvs_ = np.array(sample_points)  # copy
-            # We replace the values in the dimension that we want to keep fixed
+            # We replace the values in the dimension that we want to keep
+            # fixed
             rvs_[:, dim_locs[i]:dim_locs[i + 1]] = x_
             # In case of `x_eval=None` rvs conists of random samples.
             # Calculating the mean of these samples is how partial dependence
@@ -403,16 +408,16 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                    minimum='result', expected_minimum_samples=None):
     """Pairwise dependence plot of the objective function.
 
-    The diagonal shows the dependence for dimension `i` with
+    The diagonal shows the partial dependence for dimension `i` with
     respect to the objective function. The off-diagonal shows the
-    dependence for dimensions `i` and `j` with
+    partial dependence for dimensions `i` and `j` with
     respect to the objective function. The objective function is
     approximated by `result.model.`
 
     Pairwise scatter plots of the points at which the objective
     function was directly evaluated are shown on the off-diagonal.
     A red point indicates per default the best observed minimum, but
-    this can be changed by changing argument ´eval_min_params´.
+    this can be changed by changing argument ´minimum´.
 
     Parameters
     ----------
@@ -448,41 +453,47 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         at each of the `n_points`.
         Valid strings:  'random' - `n_random` samples will used
                         'result' - Use best observed parameters
-                        'expected_minimum' - Parameters that gives the best minimum
-                            Calculated using scipy's minimize method. This method
-                            currently does not work with categorical values.
-                        'expected_minimum_random' - Parameters that gives the best minimum
-                            when using naive random sampling. Works with categorical values
+                        'expected_minimum' - Parameters that gives the best
+                            minimum Calculated using scipy's minimize method.
+                            This method currently does not work with
+                            categorical values.
+                        'expected_minimum_random' - Parameters that gives the
+                            best minimum when using naive random sampling.
+                            Works with categorical values
 
     minimum : str or list of floats, default = 'result'
-        Defines the values for the red
-        points in the plots, and if `partialdependence` is false, this argument also
-        defines values for all other parameters when calculating dependence.
+        Defines the values for the red points in the plots.
         Valid strings:  'result' - Use best observed parameters
-                        'expected_minimum' - Parameters that gives the best minimum
-                            Calculated using scipy's minimize method. This method
-                            currently does not work with categorical values.
-                        'expected_minimum_random' - Parameters that gives the best minimum
-                            when using naive random sampling. Works with categorical values
+                        'expected_minimum' - Parameters that gives the best
+                            minimum Calculated using scipy's minimize method.
+                            This method currently does not work with
+                            categorical values.
+                        'expected_minimum_random' - Parameters that gives the
+                            best minimum when using naive random sampling.
+                            Works with categorical values
 
     expected_minimum_samples : int, default = None
         Determines how many points should be evaluated
-        to find the minimum when using 'expected_minimum' or 'expected_minimum_random'
+        to find the minimum when using 'expected_minimum' or
+        'expected_minimum_random'
 
     Returns
     -------
     ax : `Axes`
         The matplotlib axes.
     """
-    # Here we define the values for which to plot the red dot (2d plot) and the red dotted line (1d plot).
-    # These same values will be used for evaluating the plots when calculating dependence. (Unless partial
+    # Here we define the values for which to plot the red dot (2d plot) and
+    # the red dotted line (1d plot).
+    # These same values will be used for evaluating the plots when
+    # calculating dependence. (Unless partial
     # dependence is to be used instead).
     space = result.space
     x_vals = evaluate_min_params(result, minimum, expected_minimum_samples)
     if samples == "random":
         x_eval = None
     else:
-        x_eval = evaluate_min_params(result, samples, expected_minimum_samples)
+        x_eval = evaluate_min_params(result, samples,
+                                     expected_minimum_samples)
     rvs_transformed = space.transform(space.rvs(n_samples=n_samples))
     samples, minimum, _ = _map_categories(space, result.x_iters, x_vals)
 
@@ -515,7 +526,8 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
             elif i > j:
                 xi, yi, zi = partial_dependence(space, result.models[-1],
                                                 i, j,
-                                                rvs_transformed, n_points, x_eval=x_eval)
+                                                rvs_transformed, n_points,
+                                                x_eval=x_eval)
                 ax[i, j].contourf(xi, yi, zi, levels,
                                   locator=locator, cmap='viridis_r')
                 ax[i, j].scatter(samples[:, j], samples[:, i],
@@ -652,7 +664,8 @@ def _evenly_sample(dim, n_points):
     """
     cats = np.array(getattr(dim, 'categories', []), dtype=object)
     if len(cats):  # Sample categoricals while maintaining order
-        xi = np.linspace(0, len(cats) - 1, min(len(cats), n_points), dtype=int)
+        xi = np.linspace(0, len(cats) - 1, min(len(cats), n_points),
+                         dtype=int)
         xi_transformed = dim.transform(cats[xi])
     else:
         bounds = dim.bounds
@@ -684,7 +697,8 @@ def expected_min_random_sampling(model, space, n_samples=100000):
     return min_x
 
 
-def evaluate_min_params(result, params='result', expected_minimum_samples=None):
+def evaluate_min_params(result, params='result',
+                        expected_minimum_samples=None):
     x_vals = None
     space = result.space
     if isinstance(params, str):
@@ -713,7 +727,8 @@ def evaluate_min_params(result, params='result', expected_minimum_samples=None):
             if expected_minimum_samples:
                 # If a value for
                 # expected_minimum_samples has been parsed
-                x_vals = expected_min_random_sampling(result.models[-1], space,
+                x_vals = expected_min_random_sampling(result.models[-1],
+                                                      space,
                                                       n_samples=expected_minimum_samples)
             else:
                 # Use standard of 10^n_parameters. Note this
@@ -730,5 +745,6 @@ def evaluate_min_params(result, params='result', expected_minimum_samples=None):
         # Using defined x_values
         x_vals = params
     else:
-        raise ValueError('Argument ´eval_min_params´ must be a string or a list')
+        raise ValueError('Argument ´eval_min_params´ must'
+                         'be a string or a list')
     return x_vals

From 97d02771e4b9139253569478ceb604880c08acfe Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 4 Feb 2020 13:55:45 +0100
Subject: [PATCH 018/265] Fix unit test

---
 skopt/tests/test_plots.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index cb2107d39..a309de97c 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -42,8 +42,6 @@ def objective(params):
     plots.plot_objective(res)
     plots.plot_objective(res,
                          minimum='expected_minimum_random')
-    plots.plot_objective(res,
-                         minimum='expected_minimum')
     plots.plot_objective(res,
                          samples='result')
     plots.plot_regret(res)

From 481f9acf1377aeeb896289bcce678f5bfeaae11b Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 4 Feb 2020 14:20:14 +0100
Subject: [PATCH 019/265] Improve example

---
 examples/partial-dependence-plot.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/examples/partial-dependence-plot.py b/examples/partial-dependence-plot.py
index 7006f477b..f6058ebab 100644
--- a/examples/partial-dependence-plot.py
+++ b/examples/partial-dependence-plot.py
@@ -49,43 +49,48 @@ def funny_func(x):
 # predictions for each point on the plots.
 
 
-_ = plot_objective(result, samples='result', n_points=10)
+_ = plot_objective(result, n_points=10)
 
+#############################################################################
 # Here we plot without partial dependence. We see that it is a lot faster.
 # Also the values for the other parameters are set to the default "result"
 # which is the parameter set of the best observed value so far. In the case
 # of funny_func this is close to 0 for all parameters.
 
-_ = plot_objective(result, n_points=10)
+_ = plot_objective(result,  samples='result', n_points=10)
 
-# Here we try with setting the other parameters to something other than
+#############################################################################
+# Here we try with setting the `minimum` parameters to something other than
 # "result". First we try with "expected_minimum" which is the set of
-# parameters that gives the miniumum value of the surogate function,
+# parameters that gives the miniumum value of the surrogate function,
 # using scipys minimum search method.
 
 _ = plot_objective(result, n_points=10,
-                   mininum='expected_minimum')
+                   minimum='expected_minimum')
 
+#############################################################################
 # "expected_minimum_random" is a naive way of finding the minimum of the
-# surogate by only using random sampling:
+# surrogate by only using random sampling:
 
 _ = plot_objective(result, n_points=10,
-                   mininum='expected_minimum_random')
+                   minimum='expected_minimum_random')
 
+#############################################################################
 # Lastly we can also define these parameters ourselfs by parsing a list
 # as the minimum argument:
 
 _ = plot_objective(result, n_points=10, minimum=[1, -0.5, 0.5, 0])
 
+#############################################################################
 # We can also specify how many intial samples are used for the two different
 # "expected_minimum" methods. We set it to a low value in the next examples
 # to showcase how it affects the minimum for the two methods.
 
 _ = plot_objective(result, n_points=10,
-                   mininum='expected_minimum_random',
+                   minimum='expected_minimum_random',
                    expected_minimum_samples=10)
 
 #############################################################################
 
 _ = plot_objective(result, n_points=10,
-                   mininum='expected_minimum', expected_minimum_samples=1)
+                   minimum='expected_minimum', expected_minimum_samples=1)

From dae08dde8d22c9907daac47ed9259df9a91dbf6a Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 4 Feb 2020 16:29:23 +0100
Subject: [PATCH 020/265] Improve example

---
 examples/partial-dependence-plot.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/partial-dependence-plot.py b/examples/partial-dependence-plot.py
index f6058ebab..68803e217 100644
--- a/examples/partial-dependence-plot.py
+++ b/examples/partial-dependence-plot.py
@@ -37,12 +37,13 @@ def funny_func(x):
 #############################################################################
 
 # We run forest_minimize on the function
-bounds = [(-1, 1.), ] * 4
+bounds = [(-1, 1.), ] * 3
 n_calls = 150
 
 result = forest_minimize(funny_func, bounds, n_calls=n_calls, base_estimator="ET",
                          random_state=4)
 
+#############################################################################
 # Here we see an example of using partial dependence. Even when setting
 # n_points all the way down to 10 from the default of 40, this method is
 # still very slow. This is because partial dependence calculates 250 extra
@@ -79,7 +80,7 @@ def funny_func(x):
 # Lastly we can also define these parameters ourselfs by parsing a list
 # as the minimum argument:
 
-_ = plot_objective(result, n_points=10, minimum=[1, -0.5, 0.5, 0])
+_ = plot_objective(result, n_points=10, minimum=[1, -0.5, 0.5])
 
 #############################################################################
 # We can also specify how many intial samples are used for the two different

From 31fc14e18017a99b2e01de127ba9b00faa1254bd Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 6 Feb 2020 15:29:09 +0100
Subject: [PATCH 021/265] Add IntegerEncoder and add function to change
 transformer for all Dimensions

* Add unittests for IntegerEncoder
---
 skopt/space/space.py             | 132 ++++++++++++++++++++++++++-----
 skopt/space/transformers.py      |  57 +++++++++++++
 skopt/tests/test_space.py        | 113 +++++++++++++++++++-------
 skopt/tests/test_transformers.py |  65 +++++++++++++++
 4 files changed, 316 insertions(+), 51 deletions(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index 3b8a6b0ff..f82e296f9 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -11,6 +11,7 @@
 
 from .transformers import CategoricalEncoder
 from .transformers import StringEncoder
+from .transformers import IntegerEncoder
 from .transformers import Normalize
 from .transformers import Identity
 from .transformers import LogN
@@ -149,6 +150,9 @@ def inverse_transform(self, Xt):
         """
         return self.transformer.inverse_transform(Xt)
 
+    def set_transformer(self):
+        raise NotImplementedError
+
     @property
     def size(self):
         return 1
@@ -165,6 +169,10 @@ def bounds(self):
     def transformed_bounds(self):
         raise NotImplementedError
 
+    @property
+    def transformed_bounds(self):
+        raise NotImplementedError
+
     @property
     def name(self):
         return self._name
@@ -234,6 +242,9 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
         self.log_base = np.log10(base)
         self.name = name
         self.dtype = dtype
+        self._rvs = None
+        self.transformer = None
+        self.transform_ = transform
         if isinstance(self.dtype, str) and self.dtype\
                 not in ['float', 'float16', 'float32', 'float64']:
             raise ValueError("dtype must be 'float', 'float16', 'float32'"
@@ -246,14 +257,23 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
 
         if transform is None:
             transform = "identity"
+        self.set_transformer(transform)
+
+    def set_transformer(self, transform="identitiy"):
+        """Define rvs and transformer spaces.
 
+        Parameters
+        ----------
+        transform : str
+           Can be 'normalize' or 'identity'
+
+        """
         self.transform_ = transform
 
         if self.transform_ not in ["normalize", "identity"]:
             raise ValueError("transform should be 'normalize' or 'identity'"
                              " got {}".format(self.transform_))
 
-        # Define _rvs and transformer spaces.
         # XXX: The _rvs is for sampling in the transformed space.
         # The rvs on Dimension calls inverse_transform on the points sampled
         # using _rvs
@@ -263,12 +283,12 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
             self._rvs = _uniform_inclusive(0., 1.)
             if self.prior == "uniform":
                 self.transformer = Pipeline(
-                    [Identity(), Normalize(low, high)])
+                    [Identity(), Normalize(self.low, self.high)])
             else:
                 self.transformer = Pipeline(
                     [LogN(self.base),
-                     Normalize(np.log10(low) / self.log_base,
-                               np.log10(high) / self.log_base)]
+                     Normalize(np.log10(self.low) / self.log_base,
+                               np.log10(self.high) / self.log_base)]
                 )
         else:
             if self.prior == "uniform":
@@ -397,6 +417,10 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
         self.log_base = np.log10(base)
         self.name = name
         self.dtype = dtype
+        self.transform_ = transform
+        self._rvs = None
+        self.transformer = None
+
         if isinstance(self.dtype, str) and self.dtype\
             not in ['int', 'int8', 'int16', 'int32', 'int64',
                     'uint8', 'uint16', 'uint32', 'uint64']:
@@ -414,7 +438,17 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
 
         if transform is None:
             transform = "identity"
+        self.set_transformer(transform)
 
+    def set_transformer(self, transform="identitiy"):
+        """Define _rvs and transformer spaces.
+
+        Parameters
+        ----------
+        transform : str
+           Can be 'normalize' or 'identity'
+
+        """
         self.transform_ = transform
 
         if transform not in ["normalize", "identity"]:
@@ -425,13 +459,13 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
             self._rvs = _uniform_inclusive(0.0, 1.0)
             if self.prior == "uniform":
                 self.transformer = Pipeline(
-                    [Identity(), Normalize(low, high)])
+                    [Identity(), Normalize(self.low, self.high)])
             else:
 
                 self.transformer = Pipeline(
                     [LogN(self.base),
-                     Normalize(np.log10(low) / self.log_base,
-                               np.log10(high) / self.log_base)]
+                     Normalize(np.log10(self.low) / self.log_base,
+                               np.log10(self.high) / self.log_base)]
                 )
         else:
             if self.prior == "uniform":
@@ -533,7 +567,28 @@ def __init__(self, categories, prior=None, transform=None, name=None):
         if transform is None:
             transform = "onehot"
         self.transform_ = transform
-        if transform not in ["identity", "onehot", "string"]:
+        self.transformer = None
+        self._rvs = None
+        self.prior = prior
+
+        if prior is None:
+            self.prior_ = np.tile(1. / len(self.categories),
+                                  len(self.categories))
+        else:
+            self.prior_ = prior
+        self.set_transformer(transform)
+
+    def set_transformer(self, transform="onehot"):
+        """Define _rvs and transformer spaces.
+
+        Parameters
+        ----------
+        transform : str
+           Can be 'normalize' or 'identity'
+
+        """
+        self.transform_ = transform
+        if transform not in ["identity", "onehot", "string", "normalize"]:
             raise ValueError("Expected transform to be 'identity', 'string' or"
                              "'onehot' got {}".format(transform))
         if transform == "onehot":
@@ -542,22 +597,18 @@ def __init__(self, categories, prior=None, transform=None, name=None):
         elif transform == "string":
             self.transformer = StringEncoder()
             self.transformer.fit(self.categories)
+        elif transform == "normalize":
+            self._rvs = _uniform_inclusive(0.0, 1.0)
+            self.transformer = Pipeline(
+                [IntegerEncoder(list(self.categories)), Normalize(0, len(self.categories) - 1)])
         else:
             self.transformer = Identity()
             self.transformer.fit(self.categories)
-
-        self.prior = prior
-
-        if prior is None:
-            self.prior_ = np.tile(1. / len(self.categories),
-                                  len(self.categories))
-        else:
-            self.prior_ = prior
-
-        # XXX check that sum(prior) == 1
-        self._rvs = rv_discrete(
-            values=(range(len(self.categories)), self.prior_)
-        )
+        if transform != "normalize":
+            # XXX check that sum(prior) == 1
+            self._rvs = rv_discrete(
+                values=(range(len(self.categories)), self.prior_)
+            )
 
     def __eq__(self, other):
         return (type(self) is type(other) and
@@ -577,11 +628,26 @@ def __repr__(self):
 
         return "Categorical(categories={}, prior={})".format(cats, prior)
 
+    def inverse_transform(self, Xt):
+        """Inverse transform samples from the warped space back into the
+           original space.
+        """
+        # The concatenation of all transformed dimensions makes Xt to be
+        # of type float, hence the required cast back to int.
+        inv_transform = super(Categorical, self).inverse_transform(Xt)
+        if isinstance(inv_transform, list):
+            inv_transform = np.array(inv_transform)
+        return inv_transform
+
     def rvs(self, n_samples=None, random_state=None):
         choices = self._rvs.rvs(size=n_samples, random_state=random_state)
 
         if isinstance(choices, numbers.Integral):
             return self.categories[choices]
+        elif self.transform_ == "normalize" and isinstance(choices, float):
+            return self.inverse_transform([(choices)])
+        elif self.transform_ == "normalize":
+            return self.inverse_transform(list(choices))
         else:
             return [self.categories[c] for c in choices]
 
@@ -778,6 +844,30 @@ def rvs(self, n_samples=1, random_state=None):
 
         return rows
 
+    def set_transformer(self, transform):
+        """Sets the transformer of all dimension objects to `transform`
+
+        Parameters
+        ----------
+        transform : str or list of str
+           Sets all transformer,, when `transform`  is a string.
+           Otherwise, transform must be a list with strings with
+           the same length as `dimensions`
+        """
+        # Transform
+        for j in range(self.n_dims):
+            if isinstance(transform, list):
+                self.dimensions[j].set_transformer(transform[j])
+            else:
+                self.dimensions[j].set_transformer(transform)
+
+    def get_transformer(self):
+        """Returns all transformers as list"""
+        transformer = []
+        for j in range(self.n_dims):
+            transformer.append(self.dimensions[j].transform_)
+        return transformer
+
     def transform(self, X):
         """Transform samples from the original space into a warped space.
 
diff --git a/skopt/space/transformers.py b/skopt/space/transformers.py
index dd7512d76..2f24aa6b9 100644
--- a/skopt/space/transformers.py
+++ b/skopt/space/transformers.py
@@ -151,6 +151,63 @@ def inverse_transform(self, Xt):
         ]
 
 
+class IntegerEncoder(Transformer):
+    """IntegerEncoder that can handle categorical variables."""
+    def __init__(self, X=None):
+        if X is not None:
+            self.fit(X)
+
+    def fit(self, X):
+        """Fit a list or array of categories.
+
+        Parameters
+        ----------
+        X : array-like, shape=(n_categories,)
+            List of categories.
+        """
+        self.mapping_ = {v: i for i, v in enumerate(X)}
+        self.inverse_mapping_ = {i: v for v, i in self.mapping_.items()}
+        return self
+
+    def transform(self, X):
+        """Transform an array of categories to a one-hot encoded representation.
+
+        Parameters
+        ----------
+        X : array-like, shape=(n_samples,)
+            List of categories.
+
+        Returns
+        -------
+        Xt : array-like, shape=(n_samples, n_categories)
+            The integer categories.
+        """
+        X = np.asarray(X)
+        return [self.mapping_[v] for v in X]
+
+    def inverse_transform(self, Xt):
+        """Inverse transform integer categories back to their original
+           representation.
+
+        Parameters
+        ----------
+        Xt : array-like, shape=(n_samples, n_categories)
+            Integer categories.
+
+        Returns
+        -------
+        X : array-like, shape=(n_samples,)
+            The original categories.
+        """
+        if isinstance(Xt, (float, np.float64)):
+            Xt = [Xt]
+        else:
+            Xt = np.asarray(Xt)
+        return [
+            self.inverse_mapping_[int(np.round(i))] for i in Xt
+        ]
+
+
 class Normalize(Transformer):
     """
     Scales each dimension into the interval [0, 1].
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index 4cfbd1088..83021d585 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -366,8 +366,35 @@ def test_space_from_space():
     assert_equal(space, space2)
 
 
+@pytest.mark.fast_test
+def test_set_get_transformer():
+    # can you pass a Space instance to the Space constructor?
+    space = Space([(0.0, 1.0), (-5, 5),
+                   ("a", "b", "c"), (1.0, 5.0, "log-uniform"), ("e", "f")])
+
+    transformer = space.get_transformer()
+    assert_array_equal(["identity", "identity", "onehot", "identity", "onehot"], transformer)
+    space.set_transformer("normalize")
+    transformer = space.get_transformer()
+    assert_array_equal(["normalize"] * 5, transformer)
+
+
 @pytest.mark.fast_test
 def test_normalize():
+    # can you pass a Space instance to the Space constructor?
+    space = Space([(0.0, 1.0), (-5, 5),
+                   ("a", "b", "c"), (1.0, 5.0, "log-uniform"), ("e", "f")])
+    space.set_transformer("normalize")
+    X = [[0., -5, 'a', 1., 'e']]
+    Xt = np.zeros((1, 5))
+    assert_array_equal(space.transform(X), Xt)
+    assert_array_equal(space.inverse_transform(Xt), X)
+    assert_array_equal(space.inverse_transform(space.transform(X)), X)
+
+
+@pytest.mark.fast_test
+def test_normalize_real():
+
     a = Real(2.0, 30.0, transform="normalize")
     for i in range(50):
         check_limits(a.rvs(random_state=i), 2, 30)
@@ -398,6 +425,42 @@ def test_normalize():
     # Check inverse transform
     assert_array_almost_equal(a.inverse_transform(a.transform(X)), X)
 
+    a = Real(0, 1, transform="normalize", dtype=float)
+    for i in range(50):
+        check_limits(a.rvs(random_state=i), 0, 1)
+    assert_array_equal(a.transformed_bounds, (0, 1))
+
+    X = rng.rand()
+    # Check transformed values are in [0, 1]
+    assert np.all(a.transform(X) <= np.ones_like(X))
+    assert np.all(np.zeros_like(X) <= a.transform(X))
+
+    # Check inverse transform
+    X_orig = a.inverse_transform(a.transform(X))
+    assert isinstance(X_orig, float)
+    assert_array_equal(X_orig, X)
+
+    a = Real(0, 1, transform="normalize", dtype='float64')
+    X = np.float64(rng.rand())
+    # Check inverse transform
+    X_orig = a.inverse_transform(a.transform(X))
+    assert isinstance(X_orig, np.float64)
+
+    a = Real(0, 1, transform="normalize", dtype=np.float64)
+    X = np.float64(rng.rand())
+    # Check inverse transform
+    X_orig = a.inverse_transform(a.transform(X))
+    assert isinstance(X_orig, np.float64)
+
+    a = Real(0, 1, transform="normalize", dtype='float64')
+    X = np.float64(rng.rand())
+    # Check inverse transform
+    X_orig = a.inverse_transform(a.transform(X))
+    assert isinstance(X_orig, np.float64)
+
+
+@pytest.mark.fast_test
+def test_normalize_integer():
     a = Integer(2, 30, transform="normalize")
     for i in range(50):
         check_limits(a.rvs(random_state=i), 2, 30)
@@ -441,39 +504,28 @@ def test_normalize():
     assert isinstance(X_orig, int)
     assert_array_equal(X_orig, X)
 
-    a = Real(0, 1, transform="normalize", dtype=float)
-    for i in range(50):
-        check_limits(a.rvs(random_state=i), 0, 1)
-    assert_array_equal(a.transformed_bounds, (0, 1))
-
-    X = rng.rand()
-    # Check transformed values are in [0, 1]
-    assert np.all(a.transform(X) <= np.ones_like(X))
-    assert np.all(np.zeros_like(X) <= a.transform(X))
-
-    # Check inverse transform
-    X_orig = a.inverse_transform(a.transform(X))
-    assert isinstance(X_orig, float)
-    assert_array_equal(X_orig, X)
-
-    a = Real(0, 1, transform="normalize", dtype='float64')
-    X = np.float64(rng.rand())
-    # Check inverse transform
-    X_orig = a.inverse_transform(a.transform(X))
-    assert isinstance(X_orig, np.float64)
 
-    a = Real(0, 1, transform="normalize", dtype=np.float64)
-    X = np.float64(rng.rand())
-    # Check inverse transform
-    X_orig = a.inverse_transform(a.transform(X))
-    assert isinstance(X_orig, np.float64)
+@pytest.mark.fast_test
+def test_normalize_categorical():
+    categories = ["cat", "dog", "rat"]
+    a = Categorical(categories, transform="normalize")
+    for i in range(len(categories)):
+        assert a.rvs(random_state=i)[0] in categories
+    assert a.inverse_transform(0.) == categories[0]
+    assert a.inverse_transform(0.5) == categories[1]
+    assert a.inverse_transform(1.0) == categories[2]
+    assert_array_equal(categories, a.inverse_transform([0., 0.5, 1]))
 
-    a = Real(0, 1, transform="normalize", dtype='float64')
-    X = np.float64(rng.rand())
-    # Check inverse transform
-    X_orig = a.inverse_transform(a.transform(X))
-    assert isinstance(X_orig, np.float64)
+    categories = [1, 2, 3]
+    a = Categorical(categories, transform="normalize")
+    assert_array_equal(categories, np.sort(np.unique(a.rvs(100, random_state=1))))
+    assert_array_equal(categories, a.inverse_transform([0., 0.5, 1.]))
 
+    categories = [1, 2, 3]
+    a = Categorical(categories, transform="string")
+    a.set_transformer("normalize")
+    assert_array_equal(categories, np.sort(np.unique(a.rvs(100, random_state=1))))
+    assert_array_equal(categories, a.inverse_transform([0., 0.5, 1.]))
 
 @pytest.mark.fast_test
 def test_normalize_integer():
@@ -655,3 +707,4 @@ def test_purely_categorical_space():
     x = optimizer.ask()
     # before the fix this call raised an exception
     optimizer.tell(x, 1.)
+
diff --git a/skopt/tests/test_transformers.py b/skopt/tests/test_transformers.py
index 66a210ccc..caa3c96e6 100644
--- a/skopt/tests/test_transformers.py
+++ b/skopt/tests/test_transformers.py
@@ -6,6 +6,7 @@
 from numpy.testing import assert_equal
 from numpy.testing import assert_raises_regex
 from skopt.space import LogN
+from skopt.space.transformers import StringEncoder, IntegerEncoder, Identity
 
 
 @pytest.mark.fast_test
@@ -16,6 +17,7 @@ def test_logn2_integer():
         X_orig = transformer.inverse_transform(transformer.transform(X))
         assert_array_equal(int(np.round(X_orig)), X)
 
+
 @pytest.mark.fast_test
 def test_logn10_integer():
 
@@ -23,3 +25,66 @@ def test_logn10_integer():
     for X in range(2, 31):
         X_orig = transformer.inverse_transform(transformer.transform(X))
         assert_array_equal(int(np.round(X_orig)), X)
+
+
+@pytest.mark.fast_test
+def test_integer_encoder():
+
+    transformer = IntegerEncoder()
+    X = [1, 5, 9]
+    transformer.fit(X)
+    assert_array_equal(transformer.transform(X), [0, 1, 2])
+    assert_array_equal(transformer.inverse_transform([0, 1, 2]), X)
+
+    transformer = IntegerEncoder(X)
+    assert_array_equal(transformer.transform(X), [0, 1, 2])
+    assert_array_equal(transformer.inverse_transform([0, 1, 2]), X)
+
+    X = ["a", "b", "c"]
+    transformer.fit(X)
+    assert_array_equal(transformer.transform(X), [0, 1, 2])
+    assert_array_equal(transformer.inverse_transform([0, 1, 2]), X)
+
+    transformer = IntegerEncoder(X)
+    assert_array_equal(transformer.transform(X), [0, 1, 2])
+    assert_array_equal(transformer.inverse_transform([0, 1, 2]), X)
+
+
+@pytest.mark.fast_test
+def test_string_encoder():
+
+    transformer = StringEncoder()
+    X = [1, 5, 9]
+    transformer.fit(X)
+    assert_array_equal(transformer.transform(X), ['1', '5', '9'])
+    assert_array_equal(transformer.inverse_transform(['1', '5', '9']), X)
+
+    X = ['a', True, 1]
+    transformer.fit(X)
+    assert_array_equal(transformer.transform(X), ['a', 'True', '1'])
+    assert_array_equal(transformer.inverse_transform(['a', 'True', '1']), X)
+
+    X = ["a", "b", "c"]
+    transformer.fit(X)
+    assert_array_equal(transformer.transform(X), X)
+    assert_array_equal(transformer.inverse_transform(X), X)
+
+
+@pytest.mark.fast_test
+def test_identity_encoder():
+
+    transformer = Identity()
+    X = [1, 5, 9]
+    transformer.fit(X)
+    assert_array_equal(transformer.transform(X), X)
+    assert_array_equal(transformer.inverse_transform(X), X)
+
+    X = ['a', True, 1]
+    transformer.fit(X)
+    assert_array_equal(transformer.transform(X), X)
+    assert_array_equal(transformer.inverse_transform(X), X)
+
+    X = ["a", "b", "c"]
+    transformer.fit(X)
+    assert_array_equal(transformer.transform(X), X)
+    assert_array_equal(transformer.inverse_transform(X), X)

From 406d40eb80d3b9182b1295b056062083cb9bdb83 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 6 Feb 2020 15:34:10 +0100
Subject: [PATCH 022/265] Fix pep8 and typo

---
 skopt/space/space.py        |  7 ++-----
 skopt/space/transformers.py |  3 ++-
 skopt/tests/test_space.py   | 10 ++++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index f82e296f9..455e2fc67 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -169,10 +169,6 @@ def bounds(self):
     def transformed_bounds(self):
         raise NotImplementedError
 
-    @property
-    def transformed_bounds(self):
-        raise NotImplementedError
-
     @property
     def name(self):
         return self._name
@@ -600,7 +596,8 @@ def set_transformer(self, transform="onehot"):
         elif transform == "normalize":
             self._rvs = _uniform_inclusive(0.0, 1.0)
             self.transformer = Pipeline(
-                [IntegerEncoder(list(self.categories)), Normalize(0, len(self.categories) - 1)])
+                [IntegerEncoder(list(self.categories)),
+                 Normalize(0, len(self.categories) - 1)])
         else:
             self.transformer = Identity()
             self.transformer.fit(self.categories)
diff --git a/skopt/space/transformers.py b/skopt/space/transformers.py
index 2f24aa6b9..80ff7e535 100644
--- a/skopt/space/transformers.py
+++ b/skopt/space/transformers.py
@@ -170,7 +170,8 @@ def fit(self, X):
         return self
 
     def transform(self, X):
-        """Transform an array of categories to a one-hot encoded representation.
+        """Transform an array of categories to a one-hot encoded
+        representation.
 
         Parameters
         ----------
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index 83021d585..2c0173fbb 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -373,7 +373,8 @@ def test_set_get_transformer():
                    ("a", "b", "c"), (1.0, 5.0, "log-uniform"), ("e", "f")])
 
     transformer = space.get_transformer()
-    assert_array_equal(["identity", "identity", "onehot", "identity", "onehot"], transformer)
+    assert_array_equal(["identity", "identity", "onehot",
+                        "identity", "onehot"], transformer)
     space.set_transformer("normalize")
     transformer = space.get_transformer()
     assert_array_equal(["normalize"] * 5, transformer)
@@ -518,13 +519,15 @@ def test_normalize_categorical():
 
     categories = [1, 2, 3]
     a = Categorical(categories, transform="normalize")
-    assert_array_equal(categories, np.sort(np.unique(a.rvs(100, random_state=1))))
+    assert_array_equal(categories, np.sort(np.unique(a.rvs(100,
+                                                           random_state=1))))
     assert_array_equal(categories, a.inverse_transform([0., 0.5, 1.]))
 
     categories = [1, 2, 3]
     a = Categorical(categories, transform="string")
     a.set_transformer("normalize")
-    assert_array_equal(categories, np.sort(np.unique(a.rvs(100, random_state=1))))
+    assert_array_equal(categories, np.sort(np.unique(a.rvs(100,
+                                                           random_state=1))))
     assert_array_equal(categories, a.inverse_transform([0., 0.5, 1.]))
 
 @pytest.mark.fast_test
@@ -707,4 +710,3 @@ def test_purely_categorical_space():
     x = optimizer.ask()
     # before the fix this call raised an exception
     optimizer.tell(x, 1.)
-

From fc63020aad741cf9cb188d09732ea0acb39254bd Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 6 Feb 2020 16:08:44 +0100
Subject: [PATCH 023/265] Fix doc string

---
 skopt/space/space.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index 455e2fc67..90d1b5ebf 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -580,7 +580,7 @@ def set_transformer(self, transform="onehot"):
         Parameters
         ----------
         transform : str
-           Can be 'normalize' or 'identity'
+           Can be 'normalize', 'onehot', 'string' or 'identity'
 
         """
         self.transform_ = transform
@@ -594,14 +594,15 @@ def set_transformer(self, transform="onehot"):
             self.transformer = StringEncoder()
             self.transformer.fit(self.categories)
         elif transform == "normalize":
-            self._rvs = _uniform_inclusive(0.0, 1.0)
             self.transformer = Pipeline(
                 [IntegerEncoder(list(self.categories)),
                  Normalize(0, len(self.categories) - 1)])
         else:
             self.transformer = Identity()
             self.transformer.fit(self.categories)
-        if transform != "normalize":
+        if transform == "normalize":
+            self._rvs = _uniform_inclusive(0.0, 1.0)
+        else:
             # XXX check that sum(prior) == 1
             self._rvs = rv_discrete(
                 values=(range(len(self.categories)), self.prior_)

From 33ee2f759d0d6463fa33ff98a490b70c986f7830 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 7 Feb 2020 22:02:23 +0100
Subject: [PATCH 024/265] Add sample methods for improve initial sampling
 calculation

* example added
* New samples package
* Latin hypercube, Sobol, Hammersly and Halton samples were added
---
 doc/modules/classes.rst                     |  24 ++
 doc/modules/samples.rst                     |   6 +
 examples/initial-sampling-method-integer.py | 178 ++++++++++++
 examples/initial-sampling-method.py         | 178 ++++++++++++
 setup.py                                    |   2 +-
 skopt/__init__.py                           |   2 +
 skopt/samples/__init__.py                   |  13 +
 skopt/samples/halton.py                     | 111 ++++++++
 skopt/samples/hammersly.py                  |  60 ++++
 skopt/samples/lhs.py                        | 237 +++++++++++++++
 skopt/samples/sobol.py                      | 301 ++++++++++++++++++++
 skopt/samples/utils.py                      | 193 +++++++++++++
 skopt/tests/test_samples.py                 | 140 +++++++++
 13 files changed, 1444 insertions(+), 1 deletion(-)
 create mode 100644 doc/modules/samples.rst
 create mode 100644 examples/initial-sampling-method-integer.py
 create mode 100644 examples/initial-sampling-method.py
 create mode 100644 skopt/samples/__init__.py
 create mode 100644 skopt/samples/halton.py
 create mode 100644 skopt/samples/hammersly.py
 create mode 100644 skopt/samples/lhs.py
 create mode 100644 skopt/samples/sobol.py
 create mode 100644 skopt/samples/utils.py
 create mode 100644 skopt/tests/test_samples.py

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index a43b295ef..5d546cff0 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -219,6 +219,28 @@ details.
     utils.point_aslist
     utils.use_named_args
 
+.. _space_ref:
+
+:mod:`skopt.samples`: Samples
+===============================
+
+.. automodule:: skopt.samples
+   :no-members:
+   :no-inherited-members:
+
+**User guide:** See the :ref:`space` section for further details.
+
+.. currentmodule:: skopt
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+    samples.Lhs
+    samples.Sobol
+    samples.Halton
+    samples.Hammersly
+
 
 .. _space_ref:
 
@@ -272,5 +294,7 @@ details.
     space.transformers.Normalize
     space.transformers.Pipeline
     space.transformers.Transformer
+    space.transformers.IntegerEncoder
+    space.transformers.StringEncoder
 
 
diff --git a/doc/modules/samples.rst b/doc/modules/samples.rst
new file mode 100644
index 000000000..ab8441cb9
--- /dev/null
+++ b/doc/modules/samples.rst
@@ -0,0 +1,6 @@
+.. currentmodule:: skopt.samples
+
+.. _samples:
+
+Sampling methods
+================
diff --git a/examples/initial-sampling-method-integer.py b/examples/initial-sampling-method-integer.py
new file mode 100644
index 000000000..6e11edd35
--- /dev/null
+++ b/examples/initial-sampling-method-integer.py
@@ -0,0 +1,178 @@
+"""
+===================================================
+Comparing initial sampling methods on integer space
+===================================================
+
+Holger Nahrstaedt 2020 Sigurd Carlsen October 2019
+
+.. currentmodule:: skopt
+
+When doing baysian optimization we often want to reserve some of the
+early part of the optimization to pure exploration. By default the
+optimizer suggests purely random samples for the first n_initial_points
+(10 by default). The downside to this is that there is no guarantee that
+these samples are spread out evenly across all the dimensions.
+
+Sampling methods as Latin hypercube, Sobol, Halton and Hammersly
+take advantage of the fact that we know beforehand how many random
+points we want to sample. Then these points can be "spread out" in
+such a way that each dimension is explored.
+
+See also the example on a real space
+:ref:`sphx_glr_auto_examples_initial_sampling_method.py`
+"""
+
+print(__doc__)
+import numpy as np
+np.random.seed(123)
+import matplotlib.pyplot as plt
+from skopt.space import Space
+from skopt.samples import Sobol
+from skopt.samples import Lhs
+from skopt.samples import Halton
+from skopt.samples import Hammersly
+from scipy.spatial.distance import pdist
+
+#############################################################################
+
+def plot_branin(x, title):
+    fig, ax = plt.subplots()
+    plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', label='samples')
+    plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bs', markersize=40, alpha=0.5)
+    # ax.legend(loc="best", numpoints=1)
+    ax.set_xlabel("X1")
+    ax.set_xlim([0, 5])
+    ax.set_ylabel("X2")
+    ax.set_ylim([0, 5])
+    plt.title(title)
+    ax.grid(True)
+
+n_dim = 2
+n_samples = 5
+
+space = Space([(0, 5), (0, 5)])
+space.set_transformer("normalize")
+
+#############################################################################
+# Random sampling
+# ---------------
+x = space.rvs(n_samples)
+plot_branin(x, "Random samples")
+pdist_data = []
+x_label = []
+pdist_data.append(pdist(x).flatten())
+x_label.append("random")
+#############################################################################
+# Sobol
+# -----
+
+sobol = Sobol()
+inv_initial_samples = sobol.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'Sobol')
+pdist_data.append(pdist(x).flatten())
+x_label.append("sobol")
+
+
+#############################################################################
+# Classic latin hypercube sampling
+# --------------------------------
+
+lhs = Lhs(lhs_type="classic")
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'classic LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("lhs")
+
+#############################################################################
+# Centered latin hypercube sampling
+# ---------------------------------
+
+lhs = Lhs(lhs_type="centered")
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'centered LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("center")
+
+#############################################################################
+# Maximin optimized hypercube sampling
+# ------------------------------------
+
+lhs = Lhs(criterion="maximin", iterations=1000)
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'maximin LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("maximin")
+
+#############################################################################
+# Correlation optimized hypercube sampling
+# ----------------------------------------
+
+lhs = Lhs(criterion="correlation", iterations=1000)
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'correlation LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("corr")
+
+#############################################################################
+# Ratio optimized hypercube sampling
+# ----------------------------------
+
+lhs = Lhs(criterion="ratio", iterations=1000)
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'ratio LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("ratio")
+
+#############################################################################
+# ESE optimized hypercube sampling
+# --------------------------------
+
+lhs = Lhs(criterion="ese", iterations=100)
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'ese LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("ese")
+
+#############################################################################
+# Halton sampling
+# ---------------
+
+halton = Halton()
+inv_initial_samples = halton.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'Halton')
+pdist_data.append(pdist(x).flatten())
+x_label.append("halton")
+
+#############################################################################
+# Hammersly sampling
+# ------------------
+
+hammersly = Hammersly()
+inv_initial_samples = hammersly.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'Hammersly')
+pdist_data.append(pdist(x).flatten())
+x_label.append("hammersly")
+
+#############################################################################
+# Pdist boxplot of all methods
+# ----------------------------
+#
+# This boxplot shows the distance between all generated points using
+# Euclidian distance. The higher the value, the better the sampling method.
+# It can be seen that random has the worst performance
+
+fig, ax = plt.subplots()
+ax.boxplot(pdist_data)
+plt.grid(True)
+plt.ylabel("pdist")
+_ = ax.set_ylim(0, 6)
+_ = ax.set_xticklabels(x_label, rotation=45, fontsize=8)
\ No newline at end of file
diff --git a/examples/initial-sampling-method.py b/examples/initial-sampling-method.py
new file mode 100644
index 000000000..30cedb7df
--- /dev/null
+++ b/examples/initial-sampling-method.py
@@ -0,0 +1,178 @@
+"""
+==================================
+Comparing initial sampling methods
+==================================
+
+Holger Nahrstaedt 2020 Sigurd Carlsen October 2019
+
+.. currentmodule:: skopt
+
+
+When doing baysian optimization we often want to reserve some of the
+early part of the optimization to pure exploration. By default the
+optimizer suggests purely random samples for the first n_initial_points
+(10 by default). The downside to this is that there is no guarantee that
+these samples are spread out evenly across all the dimensions.
+
+Sampling methods as Latin hypercube, Sobol, Halton and Hammersly
+take advantage of the fact that we know beforehand how many random
+points we want to sample. Then these points can be "spread out" in
+such a way that each dimension is explored.
+
+See also the example on an integer space
+:ref:`sphx_glr_auto_examples_initial_sampling_method_integer.py`
+"""
+
+print(__doc__)
+import numpy as np
+np.random.seed(123)
+import matplotlib.pyplot as plt
+from skopt.space import Space
+from skopt.samples import Sobol
+from skopt.samples import Lhs
+from skopt.samples import Halton
+from skopt.samples import Hammersly
+from scipy.spatial.distance import pdist
+
+#############################################################################
+
+def plot_branin(x, title):
+    fig, ax = plt.subplots()
+    plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', label='samples')
+    plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', markersize=40, alpha=0.5)
+    # ax.legend(loc="best", numpoints=1)
+    ax.set_xlabel("X1")
+    ax.set_xlim([-5, 10])
+    ax.set_ylabel("X2")
+    ax.set_ylim([0, 15])
+    plt.title(title)
+
+n_dim = 2
+n_samples = 40
+
+space = Space([(-5., 10.), (0., 15.)])
+space.set_transformer("normalize")
+
+#############################################################################
+# Random sampling
+# ---------------
+x = space.rvs(n_samples)
+plot_branin(x, "Random samples")
+pdist_data = []
+x_label = []
+pdist_data.append(pdist(x).flatten())
+x_label.append("random")
+#############################################################################
+# Sobol
+# -----
+
+sobol = Sobol()
+inv_initial_samples = sobol.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'Sobol')
+pdist_data.append(pdist(x).flatten())
+x_label.append("sobol")
+
+
+#############################################################################
+# Classic Latin hypercube sampling
+# --------------------------------
+
+lhs = Lhs(lhs_type="classic")
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'classic LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("lhs")
+
+#############################################################################
+# Centered Latin hypercube sampling
+# ---------------------------------
+
+lhs = Lhs(lhs_type="centered")
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'centered LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("center")
+
+#############################################################################
+# Maximin optimized hypercube sampling
+# ------------------------------------
+
+lhs = Lhs(criterion="maximin", iterations=1000)
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'maximin LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("maximin")
+
+#############################################################################
+# Correlation optimized hypercube sampling
+# ----------------------------------------
+
+lhs = Lhs(criterion="correlation", iterations=1000)
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'correlation LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("corr")
+
+#############################################################################
+# Ratio optimized hypercube sampling
+# ----------------------------------
+
+lhs = Lhs(criterion="ratio", iterations=1000)
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'ratio LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("ratio")
+
+#############################################################################
+# ESE optimized hypercube sampling
+# --------------------------------
+
+lhs = Lhs(criterion="ese", iterations=10)
+inv_initial_samples = lhs.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'ese LHS')
+pdist_data.append(pdist(x).flatten())
+x_label.append("ese")
+
+#############################################################################
+# Halton sampling
+# ---------------
+
+halton = Halton()
+inv_initial_samples = halton.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'Halton')
+pdist_data.append(pdist(x).flatten())
+x_label.append("halton")
+
+#############################################################################
+# Hammersly sampling
+# ------------------
+
+hammersly = Hammersly()
+inv_initial_samples = hammersly.generate(n_dim, n_samples)
+x = space.inverse_transform(inv_initial_samples)
+plot_branin(x, 'Hammersly')
+pdist_data.append(pdist(x).flatten())
+x_label.append("hammersly")
+
+#############################################################################
+# Pdist boxplot of all methods
+# ----------------------------
+#
+# This boxplot shows the distance between all generated points using
+# Euclidian distance. The higher the value, the better the sampling method.
+# It can be seen that random has the worst performance
+
+fig, ax = plt.subplots()
+ax.boxplot(pdist_data)
+plt.grid(True)
+plt.ylabel("pdist")
+_ = ax.set_ylim(0, 12)
+_ = ax.set_xticklabels(x_label, rotation=45, fontsize=8)
\ No newline at end of file
diff --git a/setup.py b/setup.py
index b66a93600..e07997f11 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@
       license='BSD 3-clause "New" or "Revised License"',
       author='The scikit-optimize contributors',
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
-                'skopt.learning.gaussian_process'],
+                'skopt.learning.gaussian_process', 'skopt.samples'],
       install_requires=['joblib', 'pyaml', 'numpy', 'scipy>=0.14.0',
                         'scikit-learn>=0.19.1'],
       extras_require={
diff --git a/skopt/__init__.py b/skopt/__init__.py
index c0ae1e311..727ad1190 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -44,6 +44,7 @@
     from . import optimizer
 
     from . import space
+    from . import samples
     from .optimizer import dummy_minimize
     from .optimizer import forest_minimize
     from .optimizer import gbrt_minimize
@@ -61,6 +62,7 @@
         "learning",
         "optimizer",
         "plots",
+        "samples",
         "space",
         "gp_minimize",
         "dummy_minimize",
diff --git a/skopt/samples/__init__.py b/skopt/samples/__init__.py
new file mode 100644
index 000000000..ef27a9df3
--- /dev/null
+++ b/skopt/samples/__init__.py
@@ -0,0 +1,13 @@
+"""
+Utilities to define somples
+"""
+from .lhs import Lhs
+from .sobol import Sobol
+from .halton import Halton
+from .hammersly import Hammersly
+
+
+__all__ = [
+    "Lhs", "Sobol",
+    "Halton", "Hammersly"
+]
diff --git a/skopt/samples/halton.py b/skopt/samples/halton.py
new file mode 100644
index 000000000..61a5cebf8
--- /dev/null
+++ b/skopt/samples/halton.py
@@ -0,0 +1,111 @@
+"""
+Inspired by https://github.com/jonathf/chaospy/blob/master/chaospy/
+distributions/sampler/sequences/halton.py
+"""
+import numpy as np
+from .utils import create_primes
+from .utils import InitialPointGenerator
+
+
+class Halton(InitialPointGenerator):
+    """ In statistics, Halton sequences are sequences used to generate points in space
+    for numerical methods such as Monte Carlo simulations. Although these sequences
+    are deterministic, they are of low discrepancy, that is, appear to be random
+    for many purposes. They were first introduced in 1960 and are an example of
+    a quasi-random number sequence. They generalise the one-dimensional van der
+    Corput sequences.
+
+    For ``dim == 1`` the sequence falls back to Van Der Corput sequence.
+
+    Parameters
+    ----------
+    skip : int
+        Skip the first ``skip`` samples. If negative, the maximum of
+        ``primes`` is used.
+    primes : tuple
+        The (non-)prime base to calculate values along each axis. If
+        empty, growing prime values starting from 2 will be used.
+    """
+    def __init__(self, skip=-1, primes=()):
+        self.skip = skip
+        self.primes = primes
+
+    def generate(self, n_dim, n_samples, random_state=None):
+        """Creates samples from Halton set.
+
+        Parameters
+        ----------
+        n_dim : int
+           The number of dimension
+        n_samples : int
+            The order of the Halton sequence. Defines the number of samples.
+        random_state : int, RandomState instance, or None (default)
+            Set random state to something other than None for reproducible
+            results.
+        Returns
+        -------
+        np.array, shape=(n_dim, n_samples)
+            Halton set
+        """
+        primes = list(self.primes)
+        if not primes:
+            prime_order = 10 * n_dim
+            while len(primes) < n_dim:
+                primes = create_primes(prime_order)
+                prime_order *= 2
+        primes = primes[:n_dim]
+        assert len(primes) == n_dim, "not enough primes"
+
+        if self.skip < 0:
+            skip = max(primes)
+        else:
+            skip = self.skip
+
+        out = np.empty((n_dim, n_samples))
+        indices = [idx + skip for idx in range(n_samples)]
+        for dim_ in range(n_dim):
+            out[dim_] = _van_der_corput_samples(
+                indices, number_base=primes[dim_])
+        return np.transpose(out)
+
+
+
+def _van_der_corput_samples(idx, number_base=2):
+    """
+    Van der Corput samples.
+    Create `Van Der Corput` low discrepancy sequence samples.
+
+    A van der Corput sequence is an example of the simplest one-dimensional
+    low-discrepancy sequence over the unit interval; it was first described in 1935
+    by the Dutch mathematician J. G. van der Corput. It is constructed by reversing
+    the base-n representation of the sequence of natural numbers (1, 2, 3, ...).
+
+    In practice, use Halton sequence instead of Van Der Corput, as it is the
+    same, but generalized to work in multiple dimensions.
+
+    Parameters
+    ----------
+    idx (int, numpy.ndarray):
+        The index of the sequence. If array is provided, all values in
+        array is returned.
+    number_base : int
+        The numerical base from where to create the samples from.
+
+    Returns
+    -------
+    float, numpy.ndarray
+        Van der Corput samples.
+    """
+    assert number_base > 1
+
+    idx = np.asarray(idx).flatten() + 1
+    out = np.zeros(len(idx), dtype=float)
+
+    base = float(number_base)
+    active = np.ones(len(idx), dtype=bool)
+    while np.any(active):
+        out[active] += (idx[active] % number_base)/base
+        idx //= number_base
+        base *= number_base
+        active = idx > 0
+    return out
diff --git a/skopt/samples/hammersly.py b/skopt/samples/hammersly.py
new file mode 100644
index 000000000..2d131464d
--- /dev/null
+++ b/skopt/samples/hammersly.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+""" Inspired by https://github.com/jonathf/chaospy/blob/master/chaospy/
+distributions/sampler/sequences/hammersley.py
+"""
+import numpy as np
+from .halton import Halton
+from .utils import InitialPointGenerator
+
+
+class Hammersly(InitialPointGenerator):
+    """The Hammersley set is equivalent to the Halton sequence, except for one
+    dimension is replaced with a regular grid. It is not recommended to
+    generate a Hammersley sequence more than 10 dimension.
+
+    For ``dim == 1`` the sequence falls back to Van Der Corput sequence.
+
+    T-T. Wong, W-S. Luk, and P-A. Heng, "Sampling with Hammersley and Halton
+    Points," Journal of Graphics Tools, vol. 2, no. 2, 1997, pp. 9 - 24.
+
+    Parameters
+    ----------
+    skip : int
+        Skip the first ``skip`` samples. If negative, the maximum of
+        ``primes`` is used.
+    primes : tuple
+        The (non-)prime base to calculate values along each axis. If
+        empty, growing prime values starting from 2 will be used.
+    """
+    def __init__(self, skip=-1, primes=()):
+        self.skip = skip
+        self.primes = primes
+
+    def generate(self, n_dim, n_samples, random_state=None):
+        """Creates samples from Hammersly set.
+
+        Parameters
+        ----------
+        n_dim : int
+           The number of dimension
+        n_samples : int
+            The order of the Hammersley sequence. Defines the number of samples.
+        random_state : int, RandomState instance, or None (default)
+            Set random state to something other than None for reproducible
+            results.
+        Returns
+        -------
+        np.array, shape=(n_dim, n_samples)
+            Hammersley set
+        """
+        halton = Halton(skip=self.skip, primes=self.primes)
+
+        if n_dim == 1:
+            return halton.generate(n_dim, n_samples,
+                                   random_state=random_state)
+        out = np.empty((n_dim, n_samples), dtype=float)
+        out[:n_dim - 1] = halton.generate(n_dim - 1, n_samples,
+                                          random_state=random_state).T
+
+        out[n_dim - 1] = np.linspace(0, 1, n_samples + 2)[1:-1]
+        return out.T
diff --git a/skopt/samples/lhs.py b/skopt/samples/lhs.py
new file mode 100644
index 000000000..2103cf495
--- /dev/null
+++ b/skopt/samples/lhs.py
@@ -0,0 +1,237 @@
+"""
+Lhs functions are inspired by
+https://github.com/clicumu/pyDOE2/blob/
+master/pyDOE2/doe_lhs.py
+
+lhs ese is inspired by
+https://github.com/damar-wicaksono/gsa-module/blob/develop/
+gsa_module/samples/lhs_opt.py (Damar Wicaksono)
+"""
+import numpy as np
+from sklearn.utils import check_random_state
+from scipy import spatial
+from .utils import random_permute_matrix
+from .utils import InitialPointGenerator
+from .utils import w2_discrepancy_fast, calc_max_inner, calc_num_candidate
+
+
+class Lhs(InitialPointGenerator):
+    """Latin hypercube sampling
+
+    Parameters
+    ----------
+    lhs_type : str
+        - `classic` - a small random number is added
+        - `centered` - points are set uniformly in each interval
+
+    criterion : str or None, default=None
+        When set to None, the LHS is not optimized
+
+            - `correlation` : optimized LHS by minimizing the correlation
+            - `maximin` : optimized LHS by maximizing the minimal pdist
+            - `ratio` : optimized LHS by minimizing the ratio `max(pdist) / min(pdist)`
+            - `ese` : optimized LHS using Enhanced Stochastic Evolutionary Alg.
+
+    iterations : int
+        Defines the number of iterations for optimizing LHS
+    """
+    def __init__(self, lhs_type="classic", criterion=None, iterations=1000):
+        self.lhs_type = lhs_type
+        self.criterion = criterion
+        self.iterations = iterations
+
+        #  ese optimization parameters
+        # the initial threshold
+        self.ese_threshold_init = 0
+        # the number of candidates
+        # in perturbation step
+        self.ese_num_exchanges = 0
+        # the maximum number of inner iterations
+        self.ese_max_inner = 0
+        # the 2 parameters used in improve process
+        #         (a) the cut-off value to decrease the threshold
+        #         (b) the multiplier to decrease or increase the threshold
+        self.ese_improving_params = [0.1, 0.8]
+        # the 4 parameters used in explore process
+        #         (a) the cut-off value of acceptance, start increasing
+        #         the threshold
+        #         (b) the cut-off value of acceptance, start decreasing
+        #         the threshold
+        #         (c) the cooling multiplier for the threshold
+        #         (d) the warming multiplier for the threshold
+        self.ese_exploring_params = [0.1, 0.8, 0.9, 0.7]
+
+    def generate(self, n_dim, n_samples, random_state=None):
+        """Creates latin hypercube samples.
+
+        Parameters
+        ----------
+        n_dim : int
+           The number of dimension
+        n_samples : int
+            The order of the LHS sequence. Defines the number of samples.
+        random_state : int, RandomState instance, or None (default)
+            Set random state to something other than None for reproducible
+            results.
+
+        Returns
+        -------
+        np.array, shape=(n_dim, n_samples)
+            LHS set
+        """
+        rng = check_random_state(random_state)
+        if self.criterion is None:
+            x = np.linspace(0, 1, n_samples + 1)
+            u = rng.rand(n_samples, n_dim)
+            h = np.zeros_like(u)
+            if self.lhs_type == "centered":
+                for j in range(n_dim):
+                    h[:, j] = np.diff(x) / 2.0 + x[:n_samples]
+            elif self.lhs_type == "classic":
+                for j in range(n_dim):
+                    h[:, j] = u[:, j] * np.diff(x) + x[:n_samples]
+            return random_permute_matrix(h, random_state=random_state)
+        else:
+            internal_lhs = Lhs(lhs_type=self.lhs_type, criterion=None)
+            h_opt = internal_lhs.generate(n_dim, n_samples, random_state=random_state)
+            if self.criterion == "correlation":
+                mincorr = np.inf
+                for i in range(self.iterations):
+                    # Generate a random LHS
+                    h = internal_lhs.generate(n_dim, n_samples, random_state=random_state)
+                    r = np.corrcoef(h.T)
+                    if np.max(np.abs(r[r != 1])) < mincorr:
+                        mincorr = np.max(np.abs(r - np.eye(r.shape[0])))
+                        h_opt = h.copy()
+
+            elif self.criterion == "maxmin":
+                maxdist = 0
+                # Maximize the minimum distance between points
+                for i in range(self.iterations):
+                    h = internal_lhs.generate(n_dim, n_samples, random_state=random_state)
+                    d = spatial.distance.pdist(h, 'euclidean')
+                    if maxdist < np.min(d):
+                        maxdist = np.min(d)
+                        h_opt = h.copy()
+
+            elif self.criterion == "ratio":
+                minratio = np.inf
+
+                # Maximize the minimum distance between points
+                for i in range(self.iterations):
+                    h = internal_lhs.generate(n_dim, n_samples, random_state=random_state)
+                    p = spatial.distance.pdist(h, 'euclidean')
+                    ratio = np.max(p) / np.min(p)
+                    if minratio > ratio:
+                        minratio = ratio
+                        h_opt = h.copy()
+            elif self.criterion == "ese":
+
+                dm_init = internal_lhs.generate(n_dim, n_samples, random_state=random_state)
+
+                if self.ese_threshold_init <= 0.0:
+                    threshold = 0.005 * w2_discrepancy_fast(dm_init)
+                else:
+                    threshold = self.ese_threshold_init
+                if self.ese_num_exchanges <= 0:  # number of exchanges
+                    num_exchanges = calc_num_candidate(n_samples)
+                else:
+                    num_exchanges = self.ese_num_exchanges
+                if self.ese_max_inner <= 0:  # maximum number of inner iterations
+                    max_inner = calc_max_inner(n_samples, n_dim)
+                else:
+                    max_inner = self.ese_max_inner
+
+                dm = dm_init.copy()  # the current design
+                obj_func_best = w2_discrepancy_fast(dm)  # the best value of obj.func. so far
+                obj_func_best_old = w2_discrepancy_fast(dm)  # the old value of obj.func.
+                flag_explore = False  # improved flag
+
+                best_evol = []  # Keep track the best solution
+                try_evol = []  # Keep track the accepted trial solution
+
+                # Begin Outer Iteration
+                for outer in range(self.iterations):
+                    # Initialization of Inner Iteration
+                    n_accepted = 0  # number of accepted trial
+                    n_improved = 0  # number of improved trial
+
+                    # Begin Inner Iteration
+                    for inner in range(max_inner):
+                        obj_func = w2_discrepancy_fast(dm)
+                        # Perturb current design
+                        num_dimension = inner % n_dim
+                        import itertools
+
+                        # Create pairs of all possible combination
+                        pairs = list(itertools.combinations([_ for _ in range(n_samples)], 2))
+                        # Create random choices for the pair of perturbation, w/o replacement
+                        rand_choices = rng.choice(len(pairs), num_exchanges, replace=False)
+                        # Initialize the search
+                        obj_func_current = np.inf
+                        dm_current = dm.copy()
+                        for i in rand_choices:
+                            dm_try = dm.copy()  # Always perturb from the design passed in argument
+                            # Do column-wise operation in a given column 'num_dimension'
+                            dm_try[pairs[i][0], num_dimension] = dm[pairs[i][1], num_dimension]
+                            dm_try[pairs[i][1], num_dimension] = dm[pairs[i][0], num_dimension]
+                            obj_func_try = w2_discrepancy_fast(dm_try)
+                            if obj_func_try < obj_func_current:
+                                # Select the best trial from all the perturbation trials
+                                obj_func_current = obj_func_try
+                                dm_current = dm_try.copy()
+
+                        obj_func_try = w2_discrepancy_fast(dm_current)
+                        # Check whether solution is acceptable
+                        if (obj_func_try - obj_func) <= threshold * rng.rand():
+                            # Accept solution
+                            dm = dm_current.copy()
+                            n_accepted += 1
+                            try_evol.append(obj_func_try)
+                            if obj_func_try < obj_func_best:
+                                # Best solution found
+                                h_opt = dm.copy()
+                                obj_func_best = obj_func_try
+                                best_evol.append(obj_func_best)
+                                n_improved += 1
+
+                    # Accept/Reject as Best Solution for convergence checking
+                    if ((obj_func_best_old - obj_func_best) / obj_func_best) > 1e-6:
+                        # Improvement found
+                        obj_func_best_old = obj_func_best
+                        flag_explore = False  # Reset the explore flag after new best found
+                        flag_imp = True
+                    else:
+                        # Improvement not found
+                        flag_imp = False
+
+                    # Improve vs. Explore Phase and Threshold Update
+                    if flag_imp:  # Improve
+                        # New best solution found, carry out improvement process
+                        if (float(n_accepted / num_exchanges) > self.ese_improving_params[0]) & \
+                                (n_accepted > n_improved):
+                            # Lots acceptance but not all of them is improvement,
+                            # reduce threshold, make it harder to accept a trial
+                            threshold *= self.ese_improving_params[1]
+                        else:
+                            # Few acceptance or all trials are improvement, increase threshold
+                            # make it easier to accept a trial
+                            threshold /= self.ese_improving_params[1]
+
+                    else:  # Explore, No new best solution found during last iteration
+                        # Exploring process, warming up vs. cooling down
+                        if n_accepted < self.ese_exploring_params[0] * num_exchanges:
+                            # Reach below limit, increase threshold ("warming up")
+                            flag_explore = True
+                        elif n_accepted > self.ese_exploring_params[1] * num_exchanges:
+                            # Reach above limit, decrease threshold ("cooling down")
+                            flag_explore = False
+
+                        if flag_explore:
+                            # Ramp up exploration and below upper limit, increase threshold
+                            threshold /= self.ese_exploring_params[3]
+                        elif not flag_explore:
+                            # Slow down exploration and above lower limit, decrease threshold
+                            threshold *= self.ese_exploring_params[2]
+
+            return h_opt
diff --git a/skopt/samples/sobol.py b/skopt/samples/sobol.py
new file mode 100644
index 000000000..5a57e0e0c
--- /dev/null
+++ b/skopt/samples/sobol.py
@@ -0,0 +1,301 @@
+"""
+  Authors:
+    Original FORTRAN77 version of i4_sobol by Bennett Fox.
+    MATLAB version by John Burkardt.
+    PYTHON version by Corrado Chisari
+
+    Original Python version of is_prime by Corrado Chisari
+
+    Original MATLAB versions of other functions by John Burkardt.
+    PYTHON versions by Corrado Chisari
+
+    Modified Python version by Holger Nahrstaedt
+
+    Original code is available from
+    http://people.sc.fsu.edu/~jburkardt/py_src/sobol/sobol.html
+"""
+
+from __future__ import division
+import numpy as np
+from scipy.stats import norm
+from .utils import InitialPointGenerator
+from .utils import _bit_lo0, _bit_hi1, random_shift
+from sklearn.utils import check_random_state
+
+
+class Sobol(InitialPointGenerator):
+    """Generates a new quasirandom Sobol vector with each call.
+
+    The routine adapts the ideas of Antonov and Saleev.
+
+    Parameters
+    ----------
+    min_skip : int
+        minimum skipped seed number. When `min_skip != max_skip`
+        a random number is picked.
+    max_skip : int
+        maximum skipped seed number. When `min_skip != max_skip`
+        a random number is picked.
+
+    randomize : bool, default=False
+        When set to True, random shift is applied
+
+    Reference:
+      Antonov, Saleev,
+      USSR Computational Mathematics and Mathematical Physics,
+      Volume 19, 1980, pages 252 - 256.
+
+      Paul Bratley, Bennett Fox,
+      Algorithm 659:
+      Implementing Sobol's Quasirandom Sequence Generator,
+      ACM Transactions on Mathematical Software,
+      Volume 14, Number 1, pages 88-100, 1988.
+
+      Bennett Fox,
+      Algorithm 647:
+      Implementation and Relative Efficiency of Quasirandom
+      Sequence Generators,
+      ACM Transactions on Mathematical Software,
+      Volume 12, Number 4, pages 362-376, 1986.
+
+      Ilya Sobol,
+      USSR Computational Mathematics and Mathematical Physics,
+      Volume 16, pages 236-242, 1977.
+
+      Ilya Sobol, Levitan,
+      The Production of Points Uniformly Distributed in a Multidimensional
+      Cube (in Russian),
+      Preprint IPM Akad. Nauk SSSR,
+      Number 40, Moscow 1976.
+    """
+    def __init__(self, min_skip=0, max_skip=1000, randomize=False):
+
+        self.min_skip = min_skip
+        self.max_skip = max_skip
+        self.randomize = randomize
+        self.dim_max = 40
+        self.log_max = 30
+        self.atmost = 2 ** self.log_max - 1
+        self.lastq = None
+        self.maxcol = None
+        self.poly = None
+        self.recipd = None
+        self.seed_save = -1
+        self.v = np.zeros((self.dim_max, self.log_max))
+        self.dim_num_save = -1
+        self.initialized = 1
+
+    def init(self, dim_num):
+        self.dim_num_save = dim_num
+        self.v = np.zeros((self.dim_max, self.log_max))
+        self.v[0:40, 0] = np.transpose([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1])
+
+        self.v[2:40, 1] = np.transpose([1, 3, 1, 3, 1, 3, 3, 1, 3, 1, 3, 1,
+                                        3, 1, 1, 3, 1, 3, 1, 3, 1, 3, 3, 1,
+                                        3, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 3,
+                                        1, 3])
+
+        self.v[3:40, 2] = np.transpose([7, 5, 1, 3, 3, 7, 5, 5, 7, 7, 1,
+                                        3, 3, 7, 5, 1, 1, 5, 3, 3, 1, 7, 5,
+                                        1, 3, 3, 7, 5, 1, 1, 5, 7, 7, 5, 1,
+                                        3, 3])
+
+        self.v[5:40, 3] = np.transpose([1, 7,  9,  13, 11, 1, 3,  7,  9,
+                                        5,  13, 13, 11, 3,  15, 5, 3, 15,
+                                        7,  9,  13, 9,  1,  11, 7, 5, 15,
+                                        1,  15, 11, 5,  3,  1,  7,  9])
+
+        self.v[7:40, 4] = np.transpose([9,  3,  27, 15, 29, 21, 23, 19,
+                                        11, 25, 7,  13, 17, 1,  25, 29,
+                                        3,  31, 11, 5,  23, 27, 19, 21,
+                                        5,  1,  17, 13, 7,  15, 9,  31, 9])
+
+        self.v[13:40, 5] = np.transpose([37, 33, 7,  5,  11, 39, 63, 27,
+                                         17, 15, 23, 29, 3,  21, 13, 31,
+                                         25, 9,  49, 33, 19, 29, 11, 19,
+                                         27, 15, 25])
+
+        self.v[19:40, 6] = np.transpose([13, 33, 115, 41, 79, 17, 29, 119,
+                                         75, 73, 105, 7,  59,  65, 21, 3,
+                                         113, 61,  89, 45, 107])
+
+        self.v[37:40, 7] = np.transpose([7, 23, 39])
+
+        #  Set POLY.
+        self.poly = [1, 3, 7, 11, 13, 19, 25, 37, 59, 47, 61, 55, 41, 67, 97,
+                     91, 109, 103, 115, 131, 193, 137, 145, 143, 241, 157,
+                     185, 167, 229, 171, 213, 191, 253, 203, 211, 239, 247,
+                     285, 369, 299]
+
+        #  Find the number of bits in ATMOST.
+        self.maxcol = _bit_hi1(self.atmost)
+
+        #  Initialize row 1 of V.
+        self.v[0, 0:self.maxcol] = 1
+
+        #  Check parameters.
+        if dim_num < 1 or self.dim_max < dim_num:
+            print('I4_SOBOL - Fatal error!')
+            print('  The spatial dimension DIM_NUM should satisfy:')
+            print('    1 <= DIM_NUM <= %d' % self.dim_max)
+            print('  But this input value is DIM_NUM = %d' % dim_num)
+            return
+
+        #  Initialize the remaining rows of V.
+        for i in range(2, dim_num + 1):
+
+            #  The bits of the integer POLY(I) gives the form of polynomial I.
+            #  Find the degree of polynomial I from binary encoding.
+            j = self.poly[i - 1]
+            m = 0
+            j //= 2
+            while j > 0:
+                j //= 2
+                m += 1
+
+            #  Expand this bit pattern to separate components of the logical array INCLUD.
+            j = self.poly[i - 1]
+            includ = np.zeros(m)
+            for k in range(m, 0, -1):
+                j2 = j // 2
+                includ[k - 1] = (j != 2 * j2)
+                j = j2
+
+            #  Calculate the remaining elements of row I as explained
+            #  in Bratley and Fox, section 2.
+            for j in range(m + 1, self.maxcol + 1):
+                newv = self.v[i - 1, j - m - 1]
+                l = 1
+                for k in range(1, m + 1):
+                    l *= 2
+                    if includ[k - 1]:
+                        newv = np.bitwise_xor(
+                            int(newv), int(l * self.v[i - 1, j - k - 1]))
+                self.v[i - 1, j - 1] = newv
+        #  Multiply columns of V by appropriate power of 2.
+        l = 1
+        for j in range(self.maxcol - 1, 0, -1):
+            l *= 2
+            self.v[0:dim_num, j - 1] = self.v[0:dim_num, j - 1] * l
+
+        #  RECIPD is 1/(common denominator of the elements in V).
+        self.recipd = 1.0 / (2 * l)
+        self.lastq = np.zeros(dim_num)
+
+    def generate(self, n_dim, n_samples, random_state=None):
+        """Creates samples from Sobol set.
+
+        Parameters
+        ----------
+        n_dim : int
+           The number of dimension
+        n_samples : int
+            The order of the Sobol sequence. Defines the number of samples.
+        random_state : int, RandomState instance, or None (default)
+            Set random state to something other than None for reproducible
+            results.
+
+        Returns
+        -------
+        np.array, shape=(n_dim, n_samples)
+            Sobol set
+        """
+        rng = check_random_state(random_state)
+        r = np.full((n_samples, n_dim), np.nan)
+        if self.min_skip == self.max_skip:
+            seed = self.min_skip
+        else:
+            seed = rng.randint(self.min_skip, self.max_skip)
+        for j in range(n_samples):
+            r[j, 0:n_dim], seed = self._sobol(n_dim, seed)
+        if self.randomize:
+            return random_shift(r, random_state)
+        return r
+
+    def _sobol(self, dim_num, seed):
+        """Generates a new quasirandom Sobol vector with each call.
+
+        Parameters
+        ----------
+        dim_num : int
+          number of spatial dimensions.
+          `dim_num` must satisfy 1 <= DIM_NUM <= 40.
+
+        seed : int
+          the "seed" for the sequence.
+          This is essentially the index in the sequence of the quasirandom
+          value to be generated.  On output, SEED has been set to the
+          appropriate next value, usually simply SEED+1.
+          If SEED is less than 0 on input, it is treated as though it were 0.
+          An input value of 0 requests the first (0-th) element of the sequence.
+
+        Returns
+        -------
+        the next quasirandom vector.
+        """
+
+        #  Things to do only if the dimension changed.
+        if dim_num != self.dim_num_save:
+            self.init(dim_num)
+
+        seed = int(np.floor(seed))
+
+        if seed < 0:
+            seed = 0
+
+        l = 1
+        if seed == 0:
+            self.lastq = np.zeros(dim_num)
+
+        elif seed == self.seed_save + 1:
+
+            #  Find the position of the right-hand zero in SEED.
+            l = _bit_lo0(seed)
+
+        elif seed <= self.seed_save:
+
+            self.seed_save = 0
+            self.lastq = np.zeros(dim_num)
+
+            for seed_temp in range(int(self.seed_save), int(seed)):
+                l = _bit_lo0(seed_temp)
+                for i in range(1, dim_num + 1):
+                    self.lastq[i - 1] = np.bitwise_xor(
+                        int(self.lastq[i - 1]), int(self.v[i - 1, l - 1]))
+
+            l = _bit_lo0(seed)
+
+        elif self.seed_save + 1 < seed:
+
+            for seed_temp in range(int(self.seed_save + 1), int(seed)):
+                l = _bit_lo0(seed_temp)
+                for i in range(1, dim_num + 1):
+                    self.lastq[i - 1] = np.bitwise_xor(
+                        int(self.lastq[i - 1]), int(self.v[i - 1, l - 1]))
+
+            l = _bit_lo0(seed)
+
+        #  Check that the user is not calling too many times!
+        if self.maxcol < l:
+            print('I4_SOBOL - Fatal error!')
+            print('  Too many calls!')
+            print('  MAXCOL = %d\n' % self.maxcol)
+            print('  L =      %d\n' % l)
+            return
+
+        #  Calculate the new components of QUASI.
+        quasi = np.zeros(dim_num)
+        for i in range(1, dim_num + 1):
+            quasi[i - 1] = self.lastq[i - 1] * self.recipd
+            self.lastq[i - 1] = np.bitwise_xor(
+                int(self.lastq[i - 1]), int(self.v[i - 1, l - 1]))
+
+        self.seed_save = seed
+        seed += 1
+
+        return [quasi, seed]
+
+
diff --git a/skopt/samples/utils.py b/skopt/samples/utils.py
new file mode 100644
index 000000000..14543ca32
--- /dev/null
+++ b/skopt/samples/utils.py
@@ -0,0 +1,193 @@
+import numpy as np
+from sklearn.utils import check_random_state
+import math
+
+
+def create_primes(threshold):
+    """
+    Generate prime values using sieve of Eratosthenes method.
+
+    Parameters
+    ----------
+    threshold : int
+        The upper bound for the size of the prime values.
+
+    Returns
+    ------
+    List
+        All primes from 2 and up to ``threshold``.
+    """
+    if threshold == 2:
+        return [2]
+
+    elif threshold < 2:
+        return []
+
+    numbers = list(range(3, threshold+1, 2))
+    root_of_threshold = threshold ** 0.5
+    half = int((threshold+1)/2-1)
+    idx = 0
+    counter = 3
+    while counter <= root_of_threshold:
+        if numbers[idx]:
+            idy = int((counter*counter-3)/2)
+            numbers[idy] = 0
+            while idy < half:
+                numbers[idy] = 0
+                idy += counter
+        idx += 1
+        counter = 2*idx+3
+    return [2] + [number for number in numbers if number]
+
+
+def w2_discrepancy_fast(D):
+    """The vectorized version of wrap-around L2-discrepancy calculation, faster!
+    The formula for the Wrap-Around L2-Discrepancy is taken from Eq.5 of (1)
+    :math:`WD^2(D) = -(4/3)^K + 1/N^2 \Sigma_{i,j=1}^{N} \
+    Pi_{k=1}^K [3/2 - |x_k^1 - x_k^2| * (1 - |x_k^1 - x_k^2|)]`
+    The implementation below uses a vector operation of numpy array to avoid the
+    nested loop in the more straightforward implementation
+
+    Parameters
+    ----------
+    D : np.array
+     the design matrix
+
+    Returns
+    -------
+    the wrap-around L2-discrepancy
+    """
+
+    n = D.shape[0]      # the number of samples
+    k = D.shape[1]      # the number of dimension
+    delta = [None] * k
+    for i in range(k):
+        # loop over dimension to calculate the absolute difference between point
+        # in a given dimension, note the vectorized operation
+        delta[i] = np.abs(D[:, i] - np.reshape(D[:, i], (len(D[:, i]), 1)))
+
+    product = 1.5 - delta[0] * (1 - delta[0])
+    for i in range(1, k):
+        product *= (1.5 - delta[i] * (1 - delta[i]))
+
+    w2_disc = -1 * (4.0/3.0)**k + 1/n**2 * np.sum(product)
+
+    return w2_disc
+
+
+def random_permute_matrix(h, random_state=None):
+    rng = check_random_state(random_state)
+    h_rand_perm = np.zeros_like(h)
+    samples, n = h.shape
+    for j in range(n):
+        order = rng.permutation(range(samples))
+        h_rand_perm[:, j] = h[order, j]
+    return h_rand_perm
+
+
+def _bit_hi1(n):
+    """
+    Returns the position of the high 1 bit base 2 in an integer.
+
+    Parameters
+    ----------
+    n : int
+        input, should be positive
+    """
+    bin_repr = np.binary_repr(n)
+    most_left_one = bin_repr.find('1')
+    if most_left_one == -1:
+        return 0
+    else:
+        return len(bin_repr) - most_left_one
+
+
+def _bit_lo0(n):
+    """
+    Returns the position of the low 0 bit base 2 in an integer.
+
+    Parameters
+    ----------
+    n : int
+        input, should be positive
+
+    """
+    bin_repr = np.binary_repr(n)
+    most_right_zero = bin_repr[::-1].find('0')
+    if most_right_zero == -1:
+        most_right_zero = len(bin_repr)
+    return most_right_zero + 1
+
+
+def random_shift(dm, random_state=None):
+    """Random shifting of a vector
+    Randomization of the quasi-MC samples can be achieved in the easiest manner by
+    random shift (or the Cranley-Patterson rotation).
+    **Reference:**
+    (1) C. Lemieux, "Monte Carlo and Quasi-Monte Carlo Sampling," Springer
+        Series in Statistics 692, Springer Science+Business Media, New York,
+        2009
+
+    Parameters
+    ----------
+    dm : array, shape(n,d)
+        input matrix
+    random_state : int, RandomState instance, or None (default)
+        Set random state to something other than None for reproducible
+        results.
+
+    Returns
+    -------
+    Randomized Sobol' design matrix
+    """
+    rng = check_random_state(random_state)
+    # Generate random shift matrix from uniform distribution
+    shift = np.repeat(rng.rand(1, dm.shape[1]), dm.shape[0], axis=0)
+    # Return the shifted Sobol' design
+    return (dm + shift) % 1
+
+
+def calc_num_candidate(n):
+    """Calculate the number of candidates from perturbing the current design
+    Recommended in the article is the maximum number of pair combination from a
+    given column divided by a factor of 5.
+    It is also recommended that the number of candidates to be evaluated does
+    not exceed 50
+
+    Parameters
+    ----------
+    n : int
+        the number of elements to be permuted
+    Returns
+    -------
+    the number of candidates from perturbing the current design
+        column-wise
+    """
+    pairs = math.factorial(n) / math.factorial(n - 2) / math.factorial(2)
+    fac = 5  # The factor recommended in the article
+
+    return min(int(pairs / fac), 50)
+
+
+def calc_max_inner(n, k):
+    """Calculate the maximum number of inner iterations
+    :math:`\frac{2 \times n_e \times k}{J}`
+    It is recommended that the number of inner iterations does not exceed 100
+    Parameters
+    ----------
+    n : int
+        the number of samples in the design
+    k : int
+        the number of design dimension
+    Returns
+    -------
+    the maximum number of inner iterations/loop
+    """
+    pairs = math.factorial(n) / math.factorial(n - 2) / math.factorial(2)
+
+    return min(int(2 * pairs * k / calc_num_candidate(n)), 100)
+
+
+class InitialPointGenerator(object):
+    def generate(self, n_dim, n_samples, random_state=None):
+        raise NotImplemented
diff --git a/skopt/tests/test_samples.py b/skopt/tests/test_samples.py
new file mode 100644
index 000000000..50e4af2ec
--- /dev/null
+++ b/skopt/tests/test_samples.py
@@ -0,0 +1,140 @@
+import pytest
+import numbers
+import numpy as np
+import os
+import yaml
+from tempfile import NamedTemporaryFile
+
+from numpy.testing import assert_array_almost_equal
+from numpy.testing import assert_almost_equal
+from numpy.testing import assert_array_equal
+from numpy.testing import assert_equal
+from numpy.testing import assert_raises_regex
+
+from skopt import Optimizer
+from skopt.space import Space
+from skopt.space import Real
+from skopt.space import Integer
+from skopt.space import Categorical
+from skopt.space import check_dimension as space_check_dimension
+from skopt.samples.sobol import Sobol
+from skopt.samples.utils import _bit_lo0, _bit_hi1, create_primes
+from skopt.samples.halton import _van_der_corput_samples
+from skopt.samples import Hammersly, Halton
+from skopt.samples.lhs import Lhs
+
+@pytest.mark.fast_test
+def test_lhs_type():
+    lhs = Lhs(lhs_type="classic")
+    samples = lhs.generate(2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+    lhs = Lhs(lhs_type="centered")
+    samples = lhs.generate(3, 3)
+    assert_almost_equal(np.sum(samples), 4.5)
+
+
+def test_lhs_criterion():
+    for criterion in ["maximin", "ratio", "correlation"]:
+        lhs = Lhs(criterion=criterion, iterations=100)
+        samples = lhs.generate(2, 200)
+        assert len(samples) == 200
+        assert len(samples[0]) == 2
+    lhs = Lhs(criterion="ese", iterations=1)
+    samples = lhs.generate(2, 20)
+    assert len(samples) == 20
+    assert len(samples[0]) == 2
+
+
+@pytest.mark.fast_test
+def test_bit():
+    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    res = [2, 1, 3, 1, 2, 1, 4, 1, 2, 1]
+    for i in range(len(X)):
+        assert _bit_lo0(X[i]) == res[i]
+
+    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    res = [1, 2, 2, 3, 3, 3, 3, 4, 4, 4]
+    for i in range(len(X)):
+        assert _bit_hi1(X[i]) == res[i]
+
+@pytest.mark.fast_test
+def test_sobol():
+    sobol = Sobol()
+    x, seed = sobol._sobol(3, 1)
+    assert_array_equal(x, [0.5, 0.5, 0.5])
+    x, seed = sobol._sobol(3, 2)
+    assert_array_equal(x, [0.75, 0.25, 0.75])
+    x, seed = sobol._sobol(3, 3)
+    assert_array_equal(x, [0.25, 0.75, 0.25])
+    x, seed = sobol._sobol(3, 4)
+    assert_array_equal(x, [0.375, 0.375, 0.625])
+    x, seed = sobol._sobol(3, 5)
+    assert_array_equal(x, [0.875, 0.875, 0.125])
+    x, seed = sobol._sobol(3, 6)
+    assert_array_equal(x, [0.625, 0.125, 0.375])
+
+
+@pytest.mark.fast_test
+def test_generate():
+    sobol = Sobol(min_skip=1, max_skip=1)
+    x = sobol.generate(3, 3)
+    assert_array_equal(x[0, :], [0.5, 0.5, 0.5])
+    assert_array_equal(x[1, :], [0.75, 0.25, 0.75])
+    assert_array_equal(x[2, :], [0.25, 0.75, 0.25])
+
+
+@pytest.mark.fast_test
+def test_van_der_corput():
+    x = _van_der_corput_samples(range(11), number_base=10)
+    y = [0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9, 0.01, 0.11]
+    assert_array_equal(x, y)
+
+    x = _van_der_corput_samples(range(8), number_base=2)
+    y = [0.5, 0.25, 0.75, 0.125, 0.625, 0.375, 0.875, 0.0625]
+    assert_array_equal(x, y)
+
+
+@pytest.mark.fast_test
+def test_halton():
+    h = Halton()
+    x = h.generate(2, 3)
+    y = np.array([[0.125, 0.625, 0.375], [0.4444, 0.7778, 0.2222]]).T
+    assert_array_almost_equal(x, y, 1e-3)
+
+    h = Halton()
+    x = h.generate(2, 4)
+    y = np.array([[0.125, 0.625, 0.375, 0.875], [0.4444, 0.7778, 0.2222, 0.5556]]).T
+    assert_array_almost_equal(x, y, 1e-3)
+
+    samples = h.generate(2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+
+@pytest.mark.fast_test
+def test_hammersly():
+    h = Hammersly()
+    x = h.generate(2, 3)
+    y = np.array([[0.75, 0.125, 0.625], [0.25, 0.5, 0.75]]).T
+    assert_almost_equal(x, y)
+    x = h.generate(2, 4)
+    y = np.array([[0.75, 0.125, 0.625, 0.375], [0.2, 0.4, 0.6, 0.8]]).T
+    assert_almost_equal(x, y)
+
+    samples = h.generate(2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+
+@pytest.mark.fast_test
+def test_primes():
+
+    x = create_primes(1)
+    assert_array_equal(x, [])
+    x = create_primes(2)
+    assert_array_equal(x, [2])
+    x = create_primes(3)
+    assert_array_equal(x, [2, 3])
+    x = create_primes(20)
+    assert_array_equal(x, [2, 3, 5, 7, 11, 13, 17, 19])

From bdf8729f0c985669fcae0ba56ce20588e31088e1 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sat, 8 Feb 2020 20:34:15 +0100
Subject: [PATCH 025/265] Add new initial_point_generator parameter to all
 optimizer classes

---
 skopt/optimizer/base.py      | 31 ++++++++++++
 skopt/optimizer/dummy.py     | 33 +++++++++++-
 skopt/optimizer/forest.py    | 31 ++++++++++++
 skopt/optimizer/gbrt.py      | 31 ++++++++++++
 skopt/optimizer/gp.py        | 32 +++++++++++-
 skopt/optimizer/optimizer.py | 98 +++++++++++++++++++++++++++++++++---
 6 files changed, 247 insertions(+), 9 deletions(-)

diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index ab7ea40ab..0739755c5 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -22,6 +22,7 @@
 
 def base_minimize(func, dimensions, base_estimator,
                   n_calls=100, n_random_starts=10,
+                  initial_point_generator="random",
                   acq_func="EI", acq_optimizer="lbfgs",
                   x0=None, y0=None, random_state=None, verbose=False,
                   callback=None, n_points=10000, n_restarts_optimizer=5,
@@ -67,6 +68,35 @@ def base_minimize(func, dimensions, base_estimator,
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
 
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
+        Sets a initial points generator. Can be either
+
+        - "random" for uniform random numbers,
+
+        - "sobol" for a Sobol sequence,
+
+        - "halton" for a Halton sequence,
+
+        - "hammersly" for a Hammersly sequence,
+
+        - "lhs" for a latin hypercube sequence,
+
+        - "lhs_center" for a centered LHS sequence,
+
+        - "lhs_maximin" for a LHS sequence which is maximized regarding
+            the minimum distance of all points to each other
+
+        - "lhs_ratio" for a LHS sequence which is maximized regarding
+            the ratio between the maximum to the minimum distance of all
+            points to each other
+
+        - "lhs_correlation" for a LHS sequence which is minimized
+            regarding the correlation coefficients
+
+        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
+            stochastic evolutionary (ESE) algorithm
+
     acq_func : string, default=`"EI"`
         Function to minimize over the posterior distribution. Can be either
 
@@ -228,6 +258,7 @@ def base_minimize(func, dimensions, base_estimator,
     # create optimizer class
     optimizer = Optimizer(dimensions, base_estimator,
                           n_initial_points=n_initial_points,
+                          initial_point_generator=initial_point_generator,
                           acq_func=acq_func, acq_optimizer=acq_optimizer,
                           random_state=random_state,
                           model_queue_size=model_queue_size,
diff --git a/skopt/optimizer/dummy.py b/skopt/optimizer/dummy.py
index 7be17b137..c6ecb211c 100644
--- a/skopt/optimizer/dummy.py
+++ b/skopt/optimizer/dummy.py
@@ -3,7 +3,8 @@
 from .base import base_minimize
 
 
-def dummy_minimize(func, dimensions, n_calls=100, x0=None, y0=None,
+def dummy_minimize(func, dimensions, n_calls=100,
+                   initial_point_generator="random", x0=None, y0=None,
                    random_state=None, verbose=False, callback=None,
                    model_queue_size=None):
     """Random search by uniform sampling within the given bounds.
@@ -34,6 +35,35 @@ def dummy_minimize(func, dimensions, n_calls=100, x0=None, y0=None,
     n_calls : int, default=100
         Number of calls to `func` to find the minimum.
 
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
+        Sets a initial points generator. Can be either
+
+        - "random" for uniform random numbers,
+
+        - "sobol" for a Sobol sequence,
+
+        - "halton" for a Halton sequence,
+
+        - "hammersly" for a Hammersly sequence,
+
+        - "lhs" for a latin hypercube sequence,
+
+        - "lhs_center" for a centered LHS sequence,
+
+        - "lhs_maximin" for a LHS sequence which is maximized regarding
+            the minimum distance of all points to each other
+
+        - "lhs_ratio" for a LHS sequence which is maximized regarding
+            the ratio between the maximum to the minimum distance of all
+            points to each other
+
+        - "lhs_correlation" for a LHS sequence which is minimized
+            regarding the correlation coefficients
+
+        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
+            stochastic evolutionary (ESE) algorithm
+
     x0 : list, list of lists or `None`
         Initial input points.
 
@@ -105,6 +135,7 @@ def dummy_minimize(func, dimensions, n_calls=100, x0=None, y0=None,
                          # minimizer does not provide gradients.
                          acq_optimizer="sampling",
                          n_calls=n_calls, n_random_starts=n_random_calls,
+                         initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state,
                          verbose=verbose,
                          callback=callback, model_queue_size=model_queue_size)
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index c079f941a..98ee3f0be 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -9,6 +9,7 @@
 
 def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
                     n_random_starts=10, acq_func="EI",
+                    initial_point_generator="random",
                     x0=None, y0=None, random_state=None, verbose=False,
                     callback=None, n_points=10000, xi=0.01, kappa=1.96,
                     n_jobs=1, model_queue_size=None):
@@ -75,6 +76,35 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
 
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
+        Sets a initial points generator. Can be either
+
+        - "random" for uniform random numbers,
+
+        - "sobol" for a Sobol sequence,
+
+        - "halton" for a Halton sequence,
+
+        - "hammersly" for a Hammersly sequence,
+
+        - "lhs" for a latin hypercube sequence,
+
+        - "lhs_center" for a centered LHS sequence,
+
+        - "lhs_maximin" for a LHS sequence which is maximized regarding
+            the minimum distance of all points to each other
+
+        - "lhs_ratio" for a LHS sequence which is maximized regarding
+            the ratio between the maximum to the minimum distance of all
+            points to each other
+
+        - "lhs_correlation" for a LHS sequence which is minimized
+            regarding the correlation coefficients
+
+        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
+            stochastic evolutionary (ESE) algorithm
+
     acq_func : string, default="LCB"
         Function to minimize over the forest posterior. Can be either
 
@@ -169,6 +199,7 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
     return base_minimize(func, dimensions, base_estimator,
                          n_calls=n_calls, n_points=n_points,
                          n_random_starts=n_random_starts,
+                         initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state,
                          acq_func=acq_func,
                          xi=xi, kappa=kappa, verbose=verbose,
diff --git a/skopt/optimizer/gbrt.py b/skopt/optimizer/gbrt.py
index 00815d10f..0f732b64f 100644
--- a/skopt/optimizer/gbrt.py
+++ b/skopt/optimizer/gbrt.py
@@ -7,6 +7,7 @@
 
 def gbrt_minimize(func, dimensions, base_estimator=None,
                   n_calls=100, n_random_starts=10,
+                  initial_point_generator="random",
                   acq_func="EI", acq_optimizer="auto",
                   x0=None, y0=None, random_state=None, verbose=False,
                   callback=None, n_points=10000, xi=0.01, kappa=1.96,
@@ -61,6 +62,35 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
 
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
+        Sets a initial points generator. Can be either
+
+        - "random" for uniform random numbers,
+
+        - "sobol" for a Sobol sequence,
+
+        - "halton" for a Halton sequence,
+
+        - "hammersly" for a Hammersly sequence,
+
+        - "lhs" for a latin hypercube sequence,
+
+        - "lhs_center" for a centered LHS sequence,
+
+        - "lhs_maximin" for a LHS sequence which is maximized regarding
+            the minimum distance of all points to each other
+
+        - "lhs_ratio" for a LHS sequence which is maximized regarding
+            the ratio between the maximum to the minimum distance of all
+            points to each other
+
+        - "lhs_correlation" for a LHS sequence which is minimized
+            regarding the correlation coefficients
+
+        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
+            stochastic evolutionary (ESE) algorithm
+
     acq_func : string, default=`"LCB"`
         Function to minimize over the forest posterior. Can be either
 
@@ -159,6 +189,7 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
     return base_minimize(func, dimensions, base_estimator,
                          n_calls=n_calls, n_points=n_points,
                          n_random_starts=n_random_starts,
+                         initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state, xi=xi,
                          kappa=kappa, acq_func=acq_func, verbose=verbose,
                          callback=callback, acq_optimizer="sampling",
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index 5d272da4d..607413ff8 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -10,7 +10,7 @@
 
 
 def gp_minimize(func, dimensions, base_estimator=None,
-                n_calls=100, n_random_starts=10,
+                n_calls=100, n_random_starts=10, initial_point_generator="random",
                 acq_func="gp_hedge", acq_optimizer="auto", x0=None, y0=None,
                 random_state=None, verbose=False, callback=None,
                 n_points=10000, n_restarts_optimizer=5, xi=0.01, kappa=1.96,
@@ -87,6 +87,35 @@ def gp_minimize(func, dimensions, base_estimator=None,
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
 
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
+        Sets a initial points generator. Can be either
+
+        - "random" for uniform random numbers,
+
+        - "sobol" for a Sobol sequence,
+
+        - "halton" for a Halton sequence,
+
+        - "hammersly" for a Hammersly sequence,
+
+        - "lhs" for a latin hypercube sequence,
+
+        - "lhs_center" for a centered LHS sequence,
+
+        - "lhs_maximin" for a LHS sequence which is maximized regarding
+            the minimum distance of all points to each other
+
+        - "lhs_ratio" for a LHS sequence which is maximized regarding
+            the ratio between the maximum to the minimum distance of all
+            points to each other
+
+        - "lhs_correlation" for a LHS sequence which is minimized
+            regarding the correlation coefficients
+
+        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
+            stochastic evolutionary (ESE) algorithm
+
     acq_func : string, default=`"gp_hedge"`
         Function to minimize over the gaussian prior. Can be either
 
@@ -266,6 +295,7 @@ def gp_minimize(func, dimensions, base_estimator=None,
         acq_func=acq_func,
         xi=xi, kappa=kappa, acq_optimizer=acq_optimizer, n_calls=n_calls,
         n_points=n_points, n_random_starts=n_random_starts,
+        initial_point_generator=initial_point_generator,
         n_restarts_optimizer=n_restarts_optimizer,
         x0=x0, y0=y0, random_state=rng, verbose=verbose,
         callback=callback, n_jobs=n_jobs, model_queue_size=model_queue_size)
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index 94b122cf5..5e5dd40d7 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -66,9 +66,37 @@ class Optimizer(object):
 
     n_initial_points : int, default=10
         Number of evaluations of `func` with initialization points
-        before approximating it with `base_estimator`. Points provided as
-        `x0` count as initialization points. If len(x0) < n_initial_points
-        additional points are sampled at random.
+        before approximating it with `base_estimator`. Initial point
+        generator can be changed by setting `initial_point_generator`.
+
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
+        Sets a initial points generator. Can be either
+
+        - "random" for uniform random numbers,
+
+        - "sobol" for a Sobol sequence,
+
+        - "halton" for a Halton sequence,
+
+        - "hammersly" for a Hammersly sequence,
+
+        - "lhs" for a latin hypercube sequence,
+
+        - "lhs_center" for a centered LHS sequence,
+
+        - "lhs_maximin" for a LHS sequence which is maximized regarding
+            the minimum distance of all points to each other
+
+        - "lhs_ratio" for a LHS sequence which is maximized regarding
+            the ratio between the maximum to the minimum distance of all
+            points to each other
+
+        - "lhs_correlation" for a LHS sequence which is minimized
+            regarding the correlation coefficients
+
+        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
+            stochastic evolutionary (ESE) algorithm
 
     acq_func : string, default=`"gp_hedge"`
         Function to minimize over the posterior distribution. Can be either
@@ -139,8 +167,10 @@ class Optimizer(object):
         space used to sample points, bounds, and type of parameters.
 
     """
+
     def __init__(self, dimensions, base_estimator="gp",
                  n_random_starts=None, n_initial_points=10,
+                 initial_point_generator="random",
                  acq_func="gp_hedge",
                  acq_optimizer="auto",
                  random_state=None,
@@ -221,7 +251,7 @@ def __init__(self, dimensions, base_estimator="gp",
                              "'sampling', got {0}".format(acq_optimizer))
 
         if (not has_gradients(self.base_estimator_) and
-            acq_optimizer != "sampling"):
+                acq_optimizer != "sampling"):
             raise ValueError("The regressor {0} should run with "
                              "acq_optimizer"
                              "='sampling'.".format(type(base_estimator)))
@@ -245,6 +275,55 @@ def __init__(self, dimensions, base_estimator="gp",
             dimensions = normalize_dimensions(dimensions)
         self.space = Space(dimensions)
 
+        self._initial_samples = None
+        self._initial_point_generator = initial_point_generator
+        if initial_point_generator != "random" and \
+                isinstance(initial_point_generator, str):
+            if initial_point_generator == "sobol":
+                from skopt.samples import Sobol
+                self._initial_point_generator = Sobol()
+            elif initial_point_generator == "halton":
+                from skopt.samples import Halton
+                self._initial_point_generator = Halton()
+            elif initial_point_generator == "hammersly":
+                from skopt.samples import Hammersly
+                self._initial_point_generator = Hammersly()
+            elif initial_point_generator in ["lhs", "lhs_classic"]:
+                from skopt.samples import Lhs
+                self._initial_point_generator = Lhs(lhs_type="classic")
+            elif initial_point_generator == "lhs_centered":
+                from skopt.samples import Lhs
+                self._initial_point_generator = Lhs(lhs_type="centered")
+            elif initial_point_generator == "lhs_maximin":
+                from skopt.samples import Lhs
+                self._initial_point_generator = Lhs(criterion="maximin")
+            elif initial_point_generator == "lhs_ratio":
+                from skopt.samples import Lhs
+                self._initial_point_generator = Lhs(criterion="ratio")
+            elif initial_point_generator == "lhs_correlation":
+                from skopt.samples import Lhs
+                self._initial_point_generator = Lhs(criterion="correlation")
+            elif initial_point_generator == "lhs_ese":
+                from skopt.samples import Lhs
+                self._initial_point_generator = Lhs(criterion="ese",
+                                                    iterations=10)
+            else:
+                raise ValueError(
+                    "Unkown initial_point_generator: " +
+                    str(initial_point_generator)
+                )
+            try:
+                inv_initial_samples = self._initial_point_generator.generate(
+                    self.space.n_dims, n_initial_points,
+                    random_state=random_state)
+            except:
+                raise Exception("initial_point_generator is not a valid"
+                                "generator function")
+            transformer = self.space.get_transformer()
+            self.space.set_transformer("normalize")
+            self._initial_samples = self.space.inverse_transform(inv_initial_samples)
+            self.space.set_transformer(transformer)
+
         # record categorical and non-categorical indices
         self._cat_inds = []
         self._non_cat_inds = []
@@ -282,13 +361,14 @@ def copy(self, random_state=None):
             dimensions=self.space.dimensions,
             base_estimator=self.base_estimator_,
             n_initial_points=self.n_initial_points_,
+            initial_point_generator=self._initial_point_generator,
             acq_func=self.acq_func,
             acq_optimizer=self.acq_optimizer,
             acq_func_kwargs=self.acq_func_kwargs,
             acq_optimizer_kwargs=self.acq_optimizer_kwargs,
             random_state=random_state,
         )
-
+        optimizer._initial_samples = self._initial_samples
         if hasattr(self, "gains_"):
             optimizer.gains_ = np.copy(self.gains_)
 
@@ -395,7 +475,11 @@ def _ask(self):
         if self._n_initial_points > 0 or self.base_estimator_ is None:
             # this will not make a copy of `self.rng` and hence keep advancing
             # our random state.
-            return self.space.rvs(random_state=self.rng)[0]
+            if self._initial_samples is None:
+                return self.space.rvs(random_state=self.rng)[0]
+            else:
+                # The samples are evaluated starting form initial_samples[0]
+                return self._initial_samples[len(self._initial_samples) - self._n_initial_points]
 
         else:
             if not self.models:
@@ -487,7 +571,7 @@ def _tell(self, x, y, fit=True):
         # after being "told" n_initial_points we switch from sampling
         # random points to using a surrogate model
         if (fit and self._n_initial_points <= 0 and
-           self.base_estimator_ is not None):
+                self.base_estimator_ is not None):
             transformed_bounds = np.array(self.space.transformed_bounds)
             est = clone(self.base_estimator_)
 

From 8c32c61a6858d9c1cdea286772a8eae64c190ad6 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 9 Feb 2020 22:11:19 +0100
Subject: [PATCH 026/265] Show optimization example with own base estimator

---
 ...optimizer-with-different-base-estimator.py | 134 ++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 examples/optimizer-with-different-base-estimator.py

diff --git a/examples/optimizer-with-different-base-estimator.py b/examples/optimizer-with-different-base-estimator.py
new file mode 100644
index 000000000..2bc882333
--- /dev/null
+++ b/examples/optimizer-with-different-base-estimator.py
@@ -0,0 +1,134 @@
+"""
+==========================
+Use unique base estimators
+==========================
+
+Sigurd Carlen, September 2019.
+Reformatted by Holger Nahrstaedt 2020
+
+.. currentmodule:: skopt
+
+
+To use different base_estimator or create a regressor with different parameters,
+we can create a regressor object and set it as kernel.
+
+"""
+print(__doc__)
+
+import numpy as np
+np.random.seed(1234)
+import matplotlib.pyplot as plt
+
+
+#############################################################################
+# Toy example
+# -----------
+#
+# Let assume the following noisy function :math:`f`:
+
+noise_level = 0.2
+
+# Our 1D toy problem, this is the function we are trying to
+# minimize
+def objective(X, noise_level=noise_level):
+    return -np.sin(3*X[0]) - X[0]**2 + 0.7*X[0] + noise_level * np.random.randn()
+
+#############################################################################
+
+from skopt.learning import GaussianProcessRegressor
+from skopt.learning.gaussian_process.kernels import ConstantKernel, Matern
+# Gaussian process with Matérn kernel as surrogate model
+m52 = ConstantKernel(1.0) * Matern(length_scale=1.0,
+                                   nu=2.5)
+gpr = GaussianProcessRegressor(kernel=m52, alpha=noise_level**2,
+                               normalize_y=True, noise="gaussian",
+                               n_restarts_optimizer=2
+                               )
+#############################################################################
+
+from skopt import Optimizer
+opt = Optimizer([(-1.0, 2.0)], base_estimator=gpr, n_initial_points=5,
+                acq_optimizer="sampling")
+#############################################################################
+
+x = np.linspace(-1, 2, 400).reshape(-1, 1)
+fx = np.array([objective(x_i, noise_level=0.0) for x_i in x])
+
+#############################################################################
+
+from skopt.acquisition import gaussian_ei
+
+
+def plot_optimizer(opt, x, fx):
+    model = opt.models[-1]
+    x_model = opt.space.transform(x.tolist())
+    # Plot true function.
+    plt.plot(x, fx, "r--", label="True (unknown)")
+    plt.fill(np.concatenate([x, x[::-1]]),
+             np.concatenate([fx - 1.9600 * noise_level,
+                             fx[::-1] + 1.9600 * noise_level]),
+             alpha=.2, fc="r", ec="None")
+
+    # Plot Model(x) + contours
+    y_pred, sigma = model.predict(x_model, return_std=True)
+    plt.plot(x, y_pred, "g--", label=r"$\mu(x)$")
+    plt.fill(np.concatenate([x, x[::-1]]),
+             np.concatenate([y_pred - 1.9600 * sigma,
+                             (y_pred + 1.9600 * sigma)[::-1]]),
+             alpha=.2, fc="g", ec="None")
+
+    # Plot sampled points
+    plt.plot(opt.Xi, opt.yi,
+             "r.", markersize=8, label="Observations")
+
+    acq = gaussian_ei(x_model, model, y_opt=np.min(opt.yi))
+    # shift down to make a better plot
+    acq = 4 * acq - 2
+    plt.plot(x, acq, "b", label="EI(x)")
+    plt.fill_between(x.ravel(), -2.0, acq.ravel(), alpha=0.3, color='blue')
+
+    # Adjust plot layout
+    plt.grid()
+    plt.legend(loc='best')
+
+#############################################################################
+
+for i in range(10):
+    next_x = opt.ask()
+    print("%.2f next x" % next_x[0])
+    f_val = objective(next_x)
+    r = opt.tell(next_x, f_val)
+    if i >= 5:
+        # plt.subplot(5, 1, i-4)
+        plt.figure()
+        plt.title("%d" % i)
+        plot_optimizer(opt, x, fx)
+
+
+#############################################################################
+
+def plot_convergence(X_sample, Y_sample, n_init=5):
+    plt.figure(figsize=(12, 3))
+
+    x = X_sample[n_init:].ravel()
+    y = Y_sample[n_init:].ravel()
+    r = range(1, len(x) + 1)
+
+    x_neighbor_dist = [np.abs(a - b) for a, b in zip(x, x[1:])]
+    y_max_watermark = np.maximum.accumulate(y)
+
+    plt.subplot(1, 2, 1)
+    plt.plot(r[1:], x_neighbor_dist, 'bo-')
+    plt.xlabel('Iteration')
+    plt.ylabel('Distance')
+    plt.title('Distance between consecutive x\'s')
+
+    plt.subplot(1, 2, 2)
+    plt.plot(r, y_max_watermark, 'ro-')
+    plt.xlabel('Iteration')
+    plt.ylabel('Best Y')
+    plt.title('Value of best selected sample')
+
+
+plot_convergence(np.array(r.x_iters), -r.func_vals)
+

From 41468089224d32151505ac5d8052e9db3c6205bc Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 10 Feb 2020 16:15:36 +0100
Subject: [PATCH 027/265] Add optimizer helper functions and improve example

---
 examples/bayesian-optimization.py             |   4 +-
 examples/exploration-vs-exploitation.py       | 220 ++++++++++++++++++
 ...optimizer-with-different-base-estimator.py | 153 +++++++-----
 skopt/optimizer/optimizer.py                  |  23 ++
 skopt/tests/test_optimizer.py                 |  11 +-
 5 files changed, 349 insertions(+), 62 deletions(-)
 create mode 100644 examples/exploration-vs-exploitation.py

diff --git a/examples/bayesian-optimization.py b/examples/bayesian-optimization.py
index 4e7977161..4091b3137 100644
--- a/examples/bayesian-optimization.py
+++ b/examples/bayesian-optimization.py
@@ -70,7 +70,7 @@
 print(__doc__)
 
 import numpy as np
-np.random.seed(1234)
+np.random.seed(237)
 import matplotlib.pyplot as plt
 
 #############################################################################
@@ -114,7 +114,7 @@ def f(x, noise_level=noise_level):
                   n_calls=15,         # the number of evaluations of f
                   n_random_starts=5,  # the number of random initialization points
                   noise=0.1**2,       # the noise level (optional)
-                  random_state=123)   # the random seed
+                  random_state=1234)   # the random seed
 
 #############################################################################
 # Accordingly, the approximated minimum is found to be:
diff --git a/examples/exploration-vs-exploitation.py b/examples/exploration-vs-exploitation.py
new file mode 100644
index 000000000..a311cd335
--- /dev/null
+++ b/examples/exploration-vs-exploitation.py
@@ -0,0 +1,220 @@
+"""
+===========================
+Exploration vs exploitation
+===========================
+
+Sigurd Carlen, September 2019.
+Reformatted by Holger Nahrstaedt 2020
+
+.. currentmodule:: skopt
+
+
+We can control how much the acqusition function favors exploration and
+exploitation by tweaking the two parameters kappa and xi. Higher values
+means more exploration and less exploitation and vice versa with low values.
+
+kappa is only used if acq_func is set to "LCB". xi is used when acq_func is
+"EI" or "PI". By default the acqusition function is set to "gp_hedge" which
+chooses the best of these three. Therefore I recommend not using gp_hedge
+when tweaking exploration/exploitation, but instead choosing "LCB",
+"EI" or "PI.
+
+The way to pass kappa and xi to the optimizer is to use the named argument
+"acq_func_kwargs". This is a dict of extra arguments for the aqcuisittion
+function.
+
+If you want opt.ask() to give a new acquisition value imdediatly after
+tweaking kappa or xi call opt.update_next(). This ensures that the next
+value is updated with the new acquisition parameters.
+
+"""
+print(__doc__)
+
+import numpy as np
+np.random.seed(1234)
+import matplotlib.pyplot as plt
+
+
+#############################################################################
+# Toy example
+# -----------
+# First we define our objective like in the ask-and-tell example notebook and
+# define a plotting function. We do however only use on initial random point.
+# All points afterthe first one is therefore choosen by the acquisition
+# function.
+
+from skopt.learning import ExtraTreesRegressor
+from skopt import Optimizer
+
+noise_level = 0.1
+
+# Our 1D toy problem, this is the function we are trying to
+# minimize
+def objective(x, noise_level=noise_level):
+    return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) +\
+           np.random.randn() * noise_level
+
+#############################################################################
+
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points = 1,
+                acq_optimizer="sampling")
+
+#############################################################################
+
+x = np.linspace(-2, 2, 400).reshape(-1, 1)
+fx = np.array([objective(x_i, noise_level=0.0) for x_i in x])
+
+#############################################################################
+
+from skopt.acquisition import gaussian_ei
+def plot_optimizer(opt, x, fx):
+    model = opt.models[-1]
+    x_model = opt.space.transform(x.tolist())
+
+    # Plot true function.
+    plt.plot(x, fx, "r--", label="True (unknown)")
+    plt.fill(np.concatenate([x, x[::-1]]),
+             np.concatenate([fx - 1.9600 * noise_level,
+                             fx[::-1] + 1.9600 * noise_level]),
+             alpha=.2, fc="r", ec="None")
+
+    # Plot Model(x) + contours
+    y_pred, sigma = model.predict(x_model, return_std=True)
+    plt.plot(x, y_pred, "g--", label=r"$\mu(x)$")
+    plt.fill(np.concatenate([x, x[::-1]]),
+             np.concatenate([y_pred - 1.9600 * sigma,
+                             (y_pred + 1.9600 * sigma)[::-1]]),
+             alpha=.2, fc="g", ec="None")
+
+    # Plot sampled points
+    plt.plot(opt.Xi, opt.yi,
+             "r.", markersize=8, label="Observations")
+
+    acq = gaussian_ei(x_model, model, y_opt=np.min(opt.yi))
+    # shift down to make a better plot
+    acq = 4 * acq - 2
+    plt.plot(x, acq, "b", label="EI(x)")
+    plt.fill_between(x.ravel(), -2.0, acq.ravel(), alpha=0.3, color='blue')
+
+    # Adjust plot layout
+    plt.grid()
+    plt.legend(loc='best')
+
+#############################################################################
+# We run a an optimization loop with standard settings
+
+for i in range(30):
+    next_x = opt.ask()
+    f_val = objective(next_x)
+    opt.tell(next_x, f_val)
+# The same output could be created with opt.run(objective, n_iter=30)
+plot_optimizer(opt, x, fx)
+
+#############################################################################
+# We see that some minima is found and "exploited"
+#
+# Now lets try to set kappa and xi using'to other values and
+# pass it to the optimizer:
+acq_func_kwargs = {"xi": 10000, "kappa": 10000}
+#############################################################################
+
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+                acq_optimizer="sampling",
+                acq_func_kwargs=acq_func_kwargs)
+#############################################################################
+opt.run(objective, n_iter=20)
+plot_optimizer(opt, x, fx)
+#############################################################################
+# We see that the points are more random now.
+#
+# This works both for kappa when using acq_func="LCB":
+
+#############################################################################
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+                acq_func="LCB", acq_optimizer="sampling",
+                acq_func_kwargs=acq_func_kwargs)
+#############################################################################
+opt.run(objective, n_iter=20)
+plot_optimizer(opt, x, fx)
+#############################################################################
+# And for xi when using acq_func="EI": or acq_func="PI":
+
+#############################################################################
+
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+                acq_func="PI", acq_optimizer="sampling",
+                acq_func_kwargs=acq_func_kwargs)
+#############################################################################
+opt.run(objective, n_iter=20)
+plot_optimizer(opt, x, fx)
+#############################################################################
+# We can also favor exploitaton:
+acq_func_kwargs = {"xi": 0.000001, "kappa": 0.001}
+
+#############################################################################
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+                acq_func="LCB", acq_optimizer="sampling",
+                acq_func_kwargs=acq_func_kwargs)
+#############################################################################
+opt.run(objective, n_iter=20)
+plot_optimizer(opt, x, fx)
+#############################################################################
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+                acq_func="EI", acq_optimizer="sampling",
+                acq_func_kwargs=acq_func_kwargs)
+#############################################################################
+opt.run(objective, n_iter=20)
+plot_optimizer(opt, x, fx)
+#############################################################################
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+                acq_func="PI", acq_optimizer="sampling",
+                acq_func_kwargs=acq_func_kwargs)
+#############################################################################
+opt.run(objective, n_iter=20)
+plot_optimizer(opt, x, fx)
+
+#############################################################################
+# Note that negative values does not work with the "PI"-acquisition function
+# but works with "EI":
+acq_func_kwargs = {"xi": -1000000000000}
+#############################################################################
+
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+                acq_func="PI", acq_optimizer="sampling",
+                acq_func_kwargs=acq_func_kwargs)
+#############################################################################
+opt.run(objective, n_iter=20)
+plot_optimizer(opt, x, fx)
+#############################################################################
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+                acq_func="EI", acq_optimizer="sampling",
+                acq_func_kwargs=acq_func_kwargs)
+#############################################################################
+opt.run(objective, n_iter=20)
+plot_optimizer(opt, x, fx)
+#############################################################################
+# Changing kappa and xi on the go
+# -------------------------------
+# If we want to change kappa or ki at any point during our optimization
+# process we just replace opt.acq_func_kwargs. Remember to call
+# `opt.update_next()` after the change, in order for next point to be
+# recalculated.
+acq_func_kwargs = {"kappa": 0}
+#############################################################################
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+                acq_func="LCB", acq_optimizer="sampling",
+                acq_func_kwargs=acq_func_kwargs)
+#############################################################################
+opt.acq_func_kwargs
+#############################################################################
+opt.run(objective, n_iter=20)
+plot_optimizer(opt, x, fx)
+#############################################################################
+acq_func_kwargs = {"kappa": 100000}
+#############################################################################
+
+opt.acq_func_kwargs = acq_func_kwargs
+opt.update_next()
+#############################################################################
+opt.run(objective, n_iter=20)
+plot_optimizer(opt, x, fx)
diff --git a/examples/optimizer-with-different-base-estimator.py b/examples/optimizer-with-different-base-estimator.py
index 2bc882333..fc771a619 100644
--- a/examples/optimizer-with-different-base-estimator.py
+++ b/examples/optimizer-with-different-base-estimator.py
@@ -1,7 +1,7 @@
 """
-==========================
-Use unique base estimators
-==========================
+==============================================
+Use different base estimators for optimization
+==============================================
 
 Sigurd Carlen, September 2019.
 Reformatted by Holger Nahrstaedt 2020
@@ -26,109 +26,144 @@
 #
 # Let assume the following noisy function :math:`f`:
 
-noise_level = 0.2
+noise_level = 0.1
 
 # Our 1D toy problem, this is the function we are trying to
 # minimize
-def objective(X, noise_level=noise_level):
-    return -np.sin(3*X[0]) - X[0]**2 + 0.7*X[0] + noise_level * np.random.randn()
 
-#############################################################################
+def objective(x, noise_level=noise_level):
+    return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2))\
+           + np.random.randn() * noise_level
 
-from skopt.learning import GaussianProcessRegressor
-from skopt.learning.gaussian_process.kernels import ConstantKernel, Matern
-# Gaussian process with Matérn kernel as surrogate model
-m52 = ConstantKernel(1.0) * Matern(length_scale=1.0,
-                                   nu=2.5)
-gpr = GaussianProcessRegressor(kernel=m52, alpha=noise_level**2,
-                               normalize_y=True, noise="gaussian",
-                               n_restarts_optimizer=2
-                               )
 #############################################################################
 
 from skopt import Optimizer
-opt = Optimizer([(-1.0, 2.0)], base_estimator=gpr, n_initial_points=5,
-                acq_optimizer="sampling")
+opt_gp = Optimizer([(-2.0, 2.0)], base_estimator="GP", n_initial_points=5,
+                acq_optimizer="sampling", random_state=42)
+
 #############################################################################
 
-x = np.linspace(-1, 2, 400).reshape(-1, 1)
+x = np.linspace(-2, 2, 400).reshape(-1, 1)
 fx = np.array([objective(x_i, noise_level=0.0) for x_i in x])
 
 #############################################################################
 
 from skopt.acquisition import gaussian_ei
 
+def plot_optimizer(res, next_x, x, fx, n_iter, max_iters=5):
+    x_gp = res.space.transform(x.tolist())
+    gp = res.models[-1]
+    curr_x_iters = res.x_iters
+    curr_func_vals = res.func_vals
 
-def plot_optimizer(opt, x, fx):
-    model = opt.models[-1]
-    x_model = opt.space.transform(x.tolist())
     # Plot true function.
+    ax = plt.subplot(max_iters, 2, 2 * n_iter + 1)
     plt.plot(x, fx, "r--", label="True (unknown)")
     plt.fill(np.concatenate([x, x[::-1]]),
              np.concatenate([fx - 1.9600 * noise_level,
                              fx[::-1] + 1.9600 * noise_level]),
              alpha=.2, fc="r", ec="None")
-
-    # Plot Model(x) + contours
-    y_pred, sigma = model.predict(x_model, return_std=True)
-    plt.plot(x, y_pred, "g--", label=r"$\mu(x)$")
+    if n_iter < max_iters - 1:
+        ax.get_xaxis().set_ticklabels([])
+    # Plot GP(x) + contours
+    y_pred, sigma = gp.predict(x_gp, return_std=True)
+    plt.plot(x, y_pred, "g--", label=r"$\mu_{GP}(x)$")
     plt.fill(np.concatenate([x, x[::-1]]),
              np.concatenate([y_pred - 1.9600 * sigma,
                              (y_pred + 1.9600 * sigma)[::-1]]),
              alpha=.2, fc="g", ec="None")
 
     # Plot sampled points
-    plt.plot(opt.Xi, opt.yi,
+    plt.plot(curr_x_iters, curr_func_vals,
              "r.", markersize=8, label="Observations")
+    plt.title(r"x* = %.4f, f(x*) = %.4f" % (res.x[0], res.fun))
+    # Adjust plot layout
+    plt.grid()
+
+    if n_iter == 0:
+        plt.legend(loc="best", prop={'size': 6}, numpoints=1)
+
+    if n_iter != 4:
+        plt.tick_params(axis='x', which='both', bottom='off',
+                        top='off', labelbottom='off')
 
-    acq = gaussian_ei(x_model, model, y_opt=np.min(opt.yi))
-    # shift down to make a better plot
-    acq = 4 * acq - 2
+    # Plot EI(x)
+    ax = plt.subplot(max_iters, 2, 2 * n_iter + 2)
+    acq = gaussian_ei(x_gp, gp, y_opt=np.min(curr_func_vals))
     plt.plot(x, acq, "b", label="EI(x)")
     plt.fill_between(x.ravel(), -2.0, acq.ravel(), alpha=0.3, color='blue')
 
+    if n_iter < max_iters - 1:
+        ax.get_xaxis().set_ticklabels([])
+
+    next_acq = gaussian_ei(res.space.transform([next_x]), gp,
+                           y_opt=np.min(curr_func_vals))
+    plt.plot(next_x, next_acq, "bo", markersize=6, label="Next query point")
+
     # Adjust plot layout
+    plt.ylim(0, 0.07)
     plt.grid()
-    plt.legend(loc='best')
+    if n_iter == 0:
+        plt.legend(loc="best", prop={'size': 6}, numpoints=1)
+
+    if n_iter != 4:
+        plt.tick_params(axis='x', which='both', bottom='off',
+                        top='off', labelbottom='off')
 
 #############################################################################
+# GP kernel
+# ---------
 
+fig = plt.figure()
+fig.suptitle("Standard GP kernel")
 for i in range(10):
-    next_x = opt.ask()
-    print("%.2f next x" % next_x[0])
+    next_x = opt_gp.ask()
     f_val = objective(next_x)
-    r = opt.tell(next_x, f_val)
+    res = opt_gp.tell(next_x, f_val)
     if i >= 5:
-        # plt.subplot(5, 1, i-4)
-        plt.figure()
-        plt.title("%d" % i)
-        plot_optimizer(opt, x, fx)
-
+        plot_optimizer(res, opt_gp._next_x, x, fx, n_iter=i-5, max_iters=5)
+plt.tight_layout(rect=[0, 0.03, 1, 0.95])
+plt.plot()
 
 #############################################################################
+# Test different kernels
+# ----------------------
 
-def plot_convergence(X_sample, Y_sample, n_init=5):
-    plt.figure(figsize=(12, 3))
-
-    x = X_sample[n_init:].ravel()
-    y = Y_sample[n_init:].ravel()
-    r = range(1, len(x) + 1)
-
-    x_neighbor_dist = [np.abs(a - b) for a, b in zip(x, x[1:])]
-    y_max_watermark = np.maximum.accumulate(y)
+from skopt.learning import GaussianProcessRegressor
+from skopt.learning.gaussian_process.kernels import ConstantKernel, Matern
+# Gaussian process with Matérn kernel as surrogate model
 
-    plt.subplot(1, 2, 1)
-    plt.plot(r[1:], x_neighbor_dist, 'bo-')
-    plt.xlabel('Iteration')
-    plt.ylabel('Distance')
-    plt.title('Distance between consecutive x\'s')
+from sklearn.gaussian_process.kernels import (RBF, Matern, RationalQuadratic,
+                                              ExpSineSquared, DotProduct,
+                                              ConstantKernel)
 
-    plt.subplot(1, 2, 2)
-    plt.plot(r, y_max_watermark, 'ro-')
-    plt.xlabel('Iteration')
-    plt.ylabel('Best Y')
-    plt.title('Value of best selected sample')
 
+kernels = [1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0)),
+           1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1),
+           1.0 * ExpSineSquared(length_scale=1.0, periodicity=3.0,
+                                length_scale_bounds=(0.1, 10.0),
+                                periodicity_bounds=(1.0, 10.0)),
+           ConstantKernel(0.1, (0.01, 10.0))
+               * (DotProduct(sigma_0=1.0, sigma_0_bounds=(0.1, 10.0)) ** 2),
+           1.0 * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 10.0),
+                        nu=2.5)]
 
-plot_convergence(np.array(r.x_iters), -r.func_vals)
+#############################################################################
 
+for kernel in kernels:
+    gpr = GaussianProcessRegressor(kernel=kernel, alpha=noise_level ** 2,
+                                   normalize_y=True, noise="gaussian",
+                                   n_restarts_optimizer=2
+                                   )
+    opt = Optimizer([(-2.0, 2.0)], base_estimator=gpr, n_initial_points=5,
+                    acq_optimizer="sampling", random_state=42)
+    fig = plt.figure()
+    fig.suptitle(repr(kernel))
+    for i in range(10):
+        next_x = opt.ask()
+        f_val = objective(next_x)
+        res = opt.tell(next_x, f_val)
+        if i >= 5:
+            plot_optimizer(res, opt._next_x, x, fx, n_iter=i - 5, max_iters=5)
+    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
+    plt.show()
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index 94b122cf5..e485b525f 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -604,3 +604,26 @@ def run(self, func, n_iter=1):
 
         return create_result(self.Xi, self.yi, self.space, self.rng,
                              models=self.models)
+
+    def update_next(self):
+        """Updates the value returned by opt.ask(). Useful if a parameter
+        was updated after ask was called."""
+        self.cache_ = {}
+        # Ask for a new next_x.
+        # We only need to overwrite _next_x if it exists.
+        if hasattr(self, '_next_x'):
+            opt = self.copy(random_state=self.rng)
+            self._next_x = opt._next_x
+
+    def get_result(self):
+        """Returns the same result that would be returned by opt.tell()
+        but without calling tell
+
+        Returns
+        -------
+        res : `OptimizeResult`, scipy object
+            OptimizeResult instance with the required information.
+
+        """
+        return create_result(self.Xi, self.yi, self.space, self.rng,
+                             models=self.models)
diff --git a/skopt/tests/test_optimizer.py b/skopt/tests/test_optimizer.py
index 9a5f0f173..d19e6dafa 100644
--- a/skopt/tests/test_optimizer.py
+++ b/skopt/tests/test_optimizer.py
@@ -42,6 +42,8 @@ def test_multiple_asks():
     assert_equal(len(opt.models), 3)
     assert_equal(len(opt.Xi), 3)
     assert_equal(opt.ask(), opt.ask())
+    opt.update_next()
+    assert_equal(opt.ask(), opt.ask())
 
 
 @pytest.mark.fast_test
@@ -166,7 +168,8 @@ def test_dimension_checking_2D_multiple_points():
     assert "dimensions as the space" in str(e.value)
     # within bounds but one dimension too much
     with pytest.raises(ValueError) as e:
-        opt.tell([[low + 1, low + 1, low + 1], [low + 1, low + 2], [low + 1, low + 3]], 2.)
+        opt.tell([[low + 1, low + 1, low + 1], [low + 1, low + 2],
+                  [low + 1, low + 3]], 2.)
     assert "dimensions as the space" in str(e.value)
 
 
@@ -312,6 +315,8 @@ def test_defaults_are_equivalent():
         x = opt.ask()
         res_opt = opt.tell(x, branin(x))
 
+
+
     #res_min = forest_minimize(branin, space, n_calls=12, random_state=1)
     res_min = gp_minimize(branin, space, n_calls=12, random_state=1)
 
@@ -319,3 +324,7 @@ def test_defaults_are_equivalent():
     # tolerate small differences in the points sampled
     assert np.allclose(res_min.x_iters, res_opt.x_iters)#, atol=1e-5)
     assert np.allclose(res_min.x, res_opt.x)#, atol=1e-5)
+
+    res_opt2 = opt.get_result()
+    assert np.allclose(res_min.x_iters, res_opt2.x_iters)  # , atol=1e-5)
+    assert np.allclose(res_min.x, res_opt2.x)  # , atol=1e-5)

From ffb25fd36c7d1ff41819ab6dfb4328e2fe23f4d8 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 10 Feb 2020 17:11:28 +0100
Subject: [PATCH 028/265] Add unit tests and add random_state to
 evaluate_min_params

---
 skopt/plots.py            |  9 +++---
 skopt/tests/test_plots.py | 61 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 465c54ddd..a6e39494a 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -434,7 +434,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
 
     n_samples : int, default=250
         Number of random samples to use for averaging the model function
-        at each of the `n_points`.
+        at each of the `n_points` when `samples` is set to 'random'.
 
     size : float, default=2
         Height (in inches) of each facet.
@@ -698,7 +698,8 @@ def expected_min_random_sampling(model, space, n_samples=100000):
 
 
 def evaluate_min_params(result, params='result',
-                        expected_minimum_samples=None):
+                        expected_minimum_samples=None,
+                        random_state=None):
     x_vals = None
     space = result.space
     if isinstance(params, str):
@@ -716,11 +717,11 @@ def evaluate_min_params(result, params='result',
                 # expected_minimum_samples has been parsed
                 x_vals, _ = expected_minimum(result,
                                              n_random_starts=expected_minimum_samples,
-                                             random_state=None)
+                                             random_state=random_state)
             else:  # Use standard of 20 random starting points
                 x_vals, _ = expected_minimum(result,
                                              n_random_starts=20,
-                                             random_state=None)
+                                             random_state=random_state)
         elif params == 'expected_minimum_random':
             # Do a minimum search by evaluating the function with
             # n_samples sample values
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index a309de97c..b2dd86ab9 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -8,6 +8,9 @@
 from skopt.space import Integer, Categorical
 from skopt import plots, gp_minimize
 import matplotlib.pyplot as plt
+from skopt.benchmarks import bench3
+from skopt import expected_minimum
+from skopt.plots import evaluate_min_params
 
 
 def save_axes(ax, filename):
@@ -42,9 +45,67 @@ def objective(params):
     plots.plot_objective(res)
     plots.plot_objective(res,
                          minimum='expected_minimum_random')
+    plots.plot_objective(res,
+                         samples='expected_minimum_random',
+                         expected_minimum_samples=10)
     plots.plot_objective(res,
                          samples='result')
     plots.plot_regret(res)
 
     # TODO: Compare plots to known good results?
     # Look into how matplotlib does this.
+
+
+@pytest.mark.slow_test
+def test_plots_work_without_cat():
+    """Basic smoke tests to make sure plotting doesn't crash."""
+    SPACE = [
+        Integer(1, 20, name='max_depth'),
+        Integer(2, 100, name='min_samples_split'),
+        Integer(5, 30, name='min_samples_leaf'),
+        Integer(1, 30, name='max_features'),
+    ]
+
+    def objective(params):
+        clf = DecisionTreeClassifier(random_state=3,
+                                     **{dim.name: val
+                                        for dim, val in zip(SPACE, params)
+                                        if dim.name != 'dummy'})
+        return -np.mean(cross_val_score(clf, *load_breast_cancer(True)))
+
+    res = gp_minimize(objective, SPACE, n_calls=10, random_state=3)
+    plots.plot_convergence(res)
+    plots.plot_evaluations(res)
+    plots.plot_objective(res)
+    plots.plot_objective(res,
+                         minimum='expected_minimum')
+    plots.plot_objective(res,
+                         samples='expected_minimum',
+                         expected_minimum_samples=10)
+    plots.plot_objective(res,
+                         samples='result')
+    plots.plot_regret(res)
+
+    # TODO: Compare plots to known good results?
+    # Look into how matplotlib does this.
+
+
+@pytest.mark.fast_test
+def test_evaluate_min_params():
+    res = gp_minimize(bench3,
+                      [(-2.0, 2.0)],
+                      x0=[0.],
+                      noise=1e-8,
+                      n_calls=8,
+                      n_random_starts=3,
+                      random_state=1)
+
+    x_min, f_min = expected_minimum(res, random_state=1)
+
+    assert evaluate_min_params(res, params='result') == res.x
+    assert evaluate_min_params(res, params=[1.]) == [1.]
+    assert evaluate_min_params(res, params='expected_minimum',
+                               random_state=1) == x_min
+    assert evaluate_min_params(res, params='expected_minimum',
+                               expected_minimum_samples=20,
+                               random_state=1) == x_min

From d4342ad0395928d6a59b3649bf39b2df4d09294d Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 10 Feb 2020 17:16:11 +0100
Subject: [PATCH 029/265] Try to fix pep8

---
 skopt/plots.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index a6e39494a..ef717eab4 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -591,8 +591,8 @@ def plot_evaluations(result, bins=20, dimensions=None):
                     bins_ = np.logspace(np.log10(low), np.log10(high), bins)
                 else:
                     bins_ = bins
-                ax[i, i].hist(samples[:, j], bins=bins_, range=None if iscat[j]
-                else space.dimensions[j].bounds)
+                ax[i, i].hist(samples[:, j], bins=bins_, range=None
+                    if iscat[j] else space.dimensions[j].bounds)
 
             # lower triangle
             elif i > j:
@@ -716,8 +716,8 @@ def evaluate_min_params(result, params='result',
                 # If a value for
                 # expected_minimum_samples has been parsed
                 x_vals, _ = expected_minimum(result,
-                                             n_random_starts=expected_minimum_samples,
-                                             random_state=random_state)
+                    n_random_starts=expected_minimum_samples,
+                    random_state=random_state)
             else:  # Use standard of 20 random starting points
                 x_vals, _ = expected_minimum(result,
                                              n_random_starts=20,
@@ -729,13 +729,12 @@ def evaluate_min_params(result, params='result',
                 # If a value for
                 # expected_minimum_samples has been parsed
                 x_vals = expected_min_random_sampling(result.models[-1],
-                                                      space,
-                                                      n_samples=expected_minimum_samples)
+                    space, n_samples=expected_minimum_samples)
             else:
                 # Use standard of 10^n_parameters. Note this
                 # becomes very slow for many parameters
                 x_vals = expected_min_random_sampling(result.models[-1], space,
-                                                      n_samples=10 ** len(result.x))
+                    n_samples=10 ** len(result.x))
         else:
             raise ValueError('Argument ´eval_min_params´ must be a valid'
                              'string (´result´)')

From b0e3a58c186167b1011cdbaca292804160a3f335 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 10 Feb 2020 17:18:15 +0100
Subject: [PATCH 030/265] Fix pep8

---
 skopt/plots.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index ef717eab4..d2ae937fb 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -591,8 +591,9 @@ def plot_evaluations(result, bins=20, dimensions=None):
                     bins_ = np.logspace(np.log10(low), np.log10(high), bins)
                 else:
                     bins_ = bins
-                ax[i, i].hist(samples[:, j], bins=bins_, range=None
-                    if iscat[j] else space.dimensions[j].bounds)
+                ax[i, i].hist(
+                    samples[:, j], bins=bins_,
+                    range=None if iscat[j] else space.dimensions[j].bounds)
 
             # lower triangle
             elif i > j:
@@ -715,7 +716,8 @@ def evaluate_min_params(result, params='result',
             if expected_minimum_samples:
                 # If a value for
                 # expected_minimum_samples has been parsed
-                x_vals, _ = expected_minimum(result,
+                x_vals, _ = expected_minimum(
+                    result,
                     n_random_starts=expected_minimum_samples,
                     random_state=random_state)
             else:  # Use standard of 20 random starting points
@@ -728,12 +730,14 @@ def evaluate_min_params(result, params='result',
             if expected_minimum_samples:
                 # If a value for
                 # expected_minimum_samples has been parsed
-                x_vals = expected_min_random_sampling(result.models[-1],
+                x_vals = expected_min_random_sampling(
+                    result.models[-1],
                     space, n_samples=expected_minimum_samples)
             else:
                 # Use standard of 10^n_parameters. Note this
                 # becomes very slow for many parameters
-                x_vals = expected_min_random_sampling(result.models[-1], space,
+                x_vals = expected_min_random_sampling(
+                    result.models[-1], space,
                     n_samples=10 ** len(result.x))
         else:
             raise ValueError('Argument ´eval_min_params´ must be a valid'

From e0c86185c9ad5b667a17cfcf3b3470a4a719cdab Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 11:39:45 +0100
Subject: [PATCH 031/265] Fix integer Normalize

* Add unit tests to assure correction of fix
---
 skopt/space/transformers.py      | 24 +++++++++++++++++++-----
 skopt/tests/test_transformers.py | 30 ++++++++++++++++++++++++++++--
 skopt/tests/test_utils.py        |  5 ++++-
 3 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/skopt/space/transformers.py b/skopt/space/transformers.py
index dd7512d76..8b2060ceb 100644
--- a/skopt/space/transformers.py
+++ b/skopt/space/transformers.py
@@ -174,11 +174,25 @@ def __init__(self, low, high, is_int=False):
 
     def transform(self, X):
         X = np.asarray(X)
-        if np.any(X > self.high + 1e-8):
-            raise ValueError("All values should be less than %f" % self.high)
-        if np.any(X < self.low - 1e-8):
-            raise ValueError("All values should be greater than %f" % self.low)
-        return (X - self.low) / (self.high - self.low)
+        if self.is_int:
+            if np.any(np.round(X) > self.high):
+                raise ValueError("All integer values should"
+                                 "be less than %f" % self.high)
+            if np.any(np.round(X) < self.low):
+                raise ValueError("All integer values should"
+                                 "be greater than %f" % self.low)
+        else:
+            if np.any(X > self.high + 1e-8):
+                raise ValueError("All values should"
+                                 "be less than %f" % self.high)
+            if np.any(X < self.low - 1e-8):
+                raise ValueError("All values should"
+                                 "be greater than %f" % self.low)
+        if self.is_int:
+            return (np.round(X).astype(np.int) - self.low) /\
+                   (self.high - self.low)
+        else:
+            return (X - self.low) / (self.high - self.low)
 
     def inverse_transform(self, X):
         X = np.asarray(X)
diff --git a/skopt/tests/test_transformers.py b/skopt/tests/test_transformers.py
index 66a210ccc..48dd9f129 100644
--- a/skopt/tests/test_transformers.py
+++ b/skopt/tests/test_transformers.py
@@ -1,11 +1,11 @@
 import pytest
 import numbers
 import numpy as np
-from numpy.testing import assert_array_almost_equal
+from numpy.testing import assert_raises
 from numpy.testing import assert_array_equal
 from numpy.testing import assert_equal
 from numpy.testing import assert_raises_regex
-from skopt.space import LogN
+from skopt.space import LogN, Normalize
 
 
 @pytest.mark.fast_test
@@ -23,3 +23,29 @@ def test_logn10_integer():
     for X in range(2, 31):
         X_orig = transformer.inverse_transform(transformer.transform(X))
         assert_array_equal(int(np.round(X_orig)), X)
+
+
+@pytest.mark.fast_test
+def test_normalize_integer():
+    transformer = Normalize(1, 20, is_int=True)
+    assert transformer.transform(19.8) == 1.0
+    assert transformer.transform(20.2) == 1.0
+    assert transformer.transform(1.2) == 0.0
+    assert transformer.transform(0.9) == 0.0
+    assert_raises(ValueError, transformer.transform, 20.6)
+    assert_raises(ValueError, transformer.transform, 0.4)
+
+    assert transformer.inverse_transform(0.99) == 20
+    assert transformer.inverse_transform(0.01) == 1
+    assert_raises(ValueError, transformer.inverse_transform, 1. + 1e-8)
+    assert_raises(ValueError, transformer.transform, 0. - 1e-8)
+
+@pytest.mark.fast_test
+def test_normalize():
+    transformer = Normalize(1, 20, is_int=False)
+    assert transformer.transform(20.) == 1.0
+    assert transformer.transform(1.) == 0.0
+    assert_raises(ValueError, transformer.transform, 20. + 1e-7)
+    assert_raises(ValueError, transformer.transform, 1.0 - 1e-7)
+    assert_raises(ValueError, transformer.inverse_transform, 1. + 1e-8)
+    assert_raises(ValueError, transformer.transform, 0. - 1e-8)
diff --git a/skopt/tests/test_utils.py b/skopt/tests/test_utils.py
index 231679304..ddbdc8504 100644
--- a/skopt/tests/test_utils.py
+++ b/skopt/tests/test_utils.py
@@ -5,7 +5,7 @@
 from numpy.testing import assert_equal
 import numpy as np
 
-from skopt import gp_minimize
+from skopt import gp_minimize, forest_minimize
 from skopt import load
 from skopt import dump
 from skopt import expected_minimum
@@ -22,6 +22,9 @@
 from skopt.utils import normalize_dimensions
 from skopt.utils import use_named_args
 from skopt.space import Real, Integer, Categorical
+from sklearn.datasets import load_breast_cancer
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import cross_val_score
 
 
 def check_optimization_results_equality(res_1, res_2):

From 5185088e899ef8fd598444b59844fdaac4a9c0a9 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 13:35:23 +0100
Subject: [PATCH 032/265] Add expected_minimum_random_sampling

* Can be used with categorical spaces
* Adapt unit tests
* Refactor parameter and function names
---
 doc/modules/classes.rst                       |  2 +
 ...artial-dependence-plot-with-categorical.py | 97 +++++++++++++++++++
 examples/partial-dependence-plot.py           | 31 +++---
 skopt/__init__.py                             |  1 +
 skopt/plots.py                                | 89 +++++++++--------
 skopt/tests/test_plots.py                     | 45 +++++----
 skopt/tests/test_space.py                     | 11 +++
 skopt/tests/test_transformers.py              |  1 +
 skopt/tests/test_utils.py                     | 20 +++-
 skopt/utils.py                                | 56 ++++++++++-
 10 files changed, 276 insertions(+), 77 deletions(-)
 create mode 100644 examples/partial-dependence-plot-with-categorical.py

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index a43b295ef..a411f2703 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -33,6 +33,7 @@ Functions
     dummy_minimize
     dump
     expected_minimum
+    expected_minimum_random_sampling
     forest_minimize
     gbrt_minimize
     gp_minimize
@@ -213,6 +214,7 @@ details.
     utils.cook_estimator
     utils.dimensions_aslist
     utils.expected_minimum
+    utils.expected_minimum_random_sampling
     utils.dump
     utils.load
     utils.point_asdict
diff --git a/examples/partial-dependence-plot-with-categorical.py b/examples/partial-dependence-plot-with-categorical.py
new file mode 100644
index 000000000..0e2f71e33
--- /dev/null
+++ b/examples/partial-dependence-plot-with-categorical.py
@@ -0,0 +1,97 @@
+"""
+=================================================
+Partial Dependence Plots  with categorical values
+=================================================
+
+Sigurd Carlsen Feb 2019
+Holger Nahrstaedt 2020
+
+.. currentmodule:: skopt
+
+This notebook serves to showcase the new features that are being added to
+the scikit-optimize toolbox.
+"""
+print(__doc__)
+import sys
+from skopt.plots import plot_objective
+from skopt import forest_minimize
+import numpy as np
+np.random.seed(123)
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.datasets import load_breast_cancer
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import cross_val_score
+from skopt.space import Integer, Categorical
+from skopt import plots, gp_minimize
+from skopt.plots import plot_objective
+
+#############################################################################
+# objective function
+# ==================
+# Here we define a function that we evaluate.
+
+def objective(params):
+    clf = DecisionTreeClassifier(
+        **{dim.name: val for dim, val in
+           zip(SPACE, params) if dim.name != 'dummy'})
+    return -np.mean(cross_val_score(clf, *load_breast_cancer(True)))
+
+#############################################################################
+
+SPACE = [
+    Integer(1, 20, name='max_depth'),
+    Integer(2, 100, name='min_samples_split'),
+    Integer(5, 30, name='min_samples_leaf'),
+    Integer(1, 30, name='max_features'),
+    Categorical(list('abc'), name='dummy'),
+    Categorical(['gini', 'entropy'], name='criterion'),
+    Categorical(list('def'), name='dummy'),
+]
+
+#############################################################################
+
+result = gp_minimize(objective, SPACE, n_calls=20)
+
+#############################################################################
+# plot_objective
+# --------------
+# Plot objective now supports optional use of partial dependence as well as
+# different methods of defining parameter values for dependency plots
+#
+# Here we see an example of using partial dependence. Even when setting
+# n_points all the way down to 10 from the default of 40, this method is
+# still very slow. This is because partial dependence calculates 250 extra
+# predictions for each point on the plots.
+
+_ = plot_objective(result, n_points=10)
+
+#############################################################################
+# Here we plot without partial dependence. We see that it is a lot faster.
+# Also the values for the other parameters are set to the default "result"
+# which is the parameter set of the best observed value so far. In the case
+# of funny_func this is close to 0 for all parameters.
+
+_ = plot_objective(result,  sample_source='result', n_points=10)
+
+#############################################################################
+#
+# Here we try with setting the other parameters to something other than
+# "result". When dealing with categorical dimensions we can't use
+# 'expected_minimum'. Therefore we try with "expected_minimum_random"
+# which is a naive way of finding the minimum of the surrogate by only
+# using random sampling. `n_minimum_search` sets the number of random samples,
+# which is used to find the minimum
+
+_ = plot_objective(result, n_points=10, sample_source='expected_minimum_random',
+                   minimum='expected_minimum_random', n_minimum_search=10000)
+
+#############################################################################
+# Lastly we can also define these parameters ourselfs by
+# parsing a list as the pars argument:
+
+_ = plot_objective(result, n_points=10, sample_source=[15, 4, 7, 15, 'b', 'entropy', 'e'],
+                   minimum=[15, 4, 7, 15, 'b', 'entropy', 'e'])
+
+
+
diff --git a/examples/partial-dependence-plot.py b/examples/partial-dependence-plot.py
index 68803e217..bc68adfc4 100644
--- a/examples/partial-dependence-plot.py
+++ b/examples/partial-dependence-plot.py
@@ -4,7 +4,7 @@
 ========================
 
 Sigurd Carlsen Feb 2019
-Reformatted by Holger Nahrstaedt 2020
+Holger Nahrstaedt 2020
 
 .. currentmodule:: skopt
 
@@ -40,7 +40,8 @@ def funny_func(x):
 bounds = [(-1, 1.), ] * 3
 n_calls = 150
 
-result = forest_minimize(funny_func, bounds, n_calls=n_calls, base_estimator="ET",
+result = forest_minimize(funny_func, bounds, n_calls=n_calls,
+                         base_estimator="ET",
                          random_state=4)
 
 #############################################################################
@@ -52,13 +53,20 @@ def funny_func(x):
 
 _ = plot_objective(result, n_points=10)
 
+#############################################################################
+# It is possible to change the location of the red dot, which normally shows
+# the position of the found minimum. We can set it 'expected_minimum',
+# which is the minimum value of the surrogate function, obtained by a
+# minimum search method.
+
+_ = plot_objective(result, n_points=10, minimum='expected_minimum')
 #############################################################################
 # Here we plot without partial dependence. We see that it is a lot faster.
 # Also the values for the other parameters are set to the default "result"
 # which is the parameter set of the best observed value so far. In the case
 # of funny_func this is close to 0 for all parameters.
 
-_ = plot_objective(result,  samples='result', n_points=10)
+_ = plot_objective(result,  sample_source='result', n_points=10)
 
 #############################################################################
 # Here we try with setting the `minimum` parameters to something other than
@@ -66,32 +74,33 @@ def funny_func(x):
 # parameters that gives the miniumum value of the surrogate function,
 # using scipys minimum search method.
 
-_ = plot_objective(result, n_points=10,
+_ = plot_objective(result, n_points=10, sample_source='expected_minimum',
                    minimum='expected_minimum')
 
 #############################################################################
 # "expected_minimum_random" is a naive way of finding the minimum of the
 # surrogate by only using random sampling:
 
-_ = plot_objective(result, n_points=10,
+_ = plot_objective(result, n_points=10, sample_source='expected_minimum_random',
                    minimum='expected_minimum_random')
 
 #############################################################################
-# Lastly we can also define these parameters ourselfs by parsing a list
+# Lastly we can also define these parameters ourself by parsing a list
 # as the minimum argument:
 
-_ = plot_objective(result, n_points=10, minimum=[1, -0.5, 0.5])
+_ = plot_objective(result, n_points=10, sample_source=[1, -0.5, 0.5],
+                   minimum=[1, -0.5, 0.5])
 
 #############################################################################
-# We can also specify how many intial samples are used for the two different
+# We can also specify how many initial samples are used for the two different
 # "expected_minimum" methods. We set it to a low value in the next examples
 # to showcase how it affects the minimum for the two methods.
 
-_ = plot_objective(result, n_points=10,
+_ = plot_objective(result, n_points=10, sample_source='expected_minimum_random',
                    minimum='expected_minimum_random',
-                   expected_minimum_samples=10)
+                   n_minimum_search=10)
 
 #############################################################################
 
 _ = plot_objective(result, n_points=10,
-                   minimum='expected_minimum', expected_minimum_samples=1)
+                   minimum='expected_minimum', n_minimum_search=1)
diff --git a/skopt/__init__.py b/skopt/__init__.py
index c0ae1e311..c10252551 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -53,6 +53,7 @@
     from .space import Space
     from .utils import dump
     from .utils import expected_minimum
+    from .utils import expected_minimum_random_sampling
     from .utils import load
     __all__ = (
         "acquisition",
diff --git a/skopt/plots.py b/skopt/plots.py
index d2ae937fb..0251355cc 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -5,7 +5,7 @@
 from functools import partial
 from scipy.optimize import OptimizeResult
 
-from skopt import expected_minimum
+from skopt import expected_minimum, expected_minimum_random_sampling
 from .space import Categorical
 
 # For plot tests, matplotlib must be set to headless mode early
@@ -350,9 +350,10 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
     For Categorical variables, the `xi` (and `yi` for 2D) returned are
     the indices of the variable in `Dimension.categories`.
     """
-    # The idea is to step through one dimension and evaluating the model with
-    # that dimension fixed.  (Or step through 2 dimensions when i and j are
-    # given.)
+    # The idea is to step through one dimension, evaluating the model with
+    # that dimension fixed and averaging either over random values or over
+    # the given ones in x_val in all other dimensions.
+    # (Or step through 2 dimensions when i and j are given.)
     # Categorical dimensions make this interesting, because they are one-
     # hot-encoded, so there is a one-to-many mapping of input dimensions
     # to transformed (model) dimensions.
@@ -404,8 +405,8 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
 
 
 def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
-                   zscale='linear', dimensions=None, samples='random',
-                   minimum='result', expected_minimum_samples=None):
+                   zscale='linear', dimensions=None, sample_source='random',
+                   minimum='result', n_minimum_search=None):
     """Pairwise dependence plot of the objective function.
 
     The diagonal shows the partial dependence for dimension `i` with
@@ -433,8 +434,8 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         along each dimension.
 
     n_samples : int, default=250
-        Number of random samples to use for averaging the model function
-        at each of the `n_points` when `samples` is set to 'random'.
+        Number of samples to use for averaging the model function
+        at each of the `n_points` when `sample_method` is set to 'random'.
 
     size : float, default=2
         Height (in inches) of each facet.
@@ -448,11 +449,20 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         variables. `None` defaults to `space.dimensions[i].name`, or
         if also `None` to `['X_0', 'X_1', ..]`.
 
-    samples : str or list of floats, default='random'
+    sample_source : str or list of floats, default='random'
         Defines to samples generation to use for averaging the model function
         at each of the `n_points`.
-        Valid strings:  'random' - `n_random` samples will used
-                        'result' - Use best observed parameters
+
+        A real partial dependence plot is only generated, when `sample_source`
+        is set to 'random', but as this can be slow, it can be speed up by
+        setting it to the other parameter values, which always use
+        `n_samples=1`.
+
+        `sample_source` can also be a list of
+        floats, which is then used for averaging.
+
+        Valid strings:  'random' - `n_samples` random samples will used
+                        'result' - Use only the best observed parameters
                         'expected_minimum' - Parameters that gives the best
                             minimum Calculated using scipy's minimize method.
                             This method currently does not work with
@@ -472,10 +482,12 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                             best minimum when using naive random sampling.
                             Works with categorical values
 
-    expected_minimum_samples : int, default = None
+    n_minimum_search : int, default = None
         Determines how many points should be evaluated
         to find the minimum when using 'expected_minimum' or
-        'expected_minimum_random'
+        'expected_minimum_random'. Parameter is used when
+        `sample_source` and/or `minimum` is set to
+        'expected_minimum' or 'expected_minimum_random'.
 
     Returns
     -------
@@ -488,12 +500,12 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     # calculating dependence. (Unless partial
     # dependence is to be used instead).
     space = result.space
-    x_vals = evaluate_min_params(result, minimum, expected_minimum_samples)
-    if samples == "random":
+    x_vals = _evaluate_min_params(result, minimum, n_minimum_search)
+    if sample_source == "random":
         x_eval = None
     else:
-        x_eval = evaluate_min_params(result, samples,
-                                     expected_minimum_samples)
+        x_eval = _evaluate_min_params(result, sample_source,
+                                      n_minimum_search)
     rvs_transformed = space.transform(space.rvs(n_samples=n_samples))
     samples, minimum, _ = _map_categories(space, result.x_iters, x_vals)
 
@@ -682,25 +694,10 @@ def _cat_format(dimension, x, _):
     return str(dimension.categories[int(x)])
 
 
-def expected_min_random_sampling(model, space, n_samples=100000):
-    """Minimum search by doing naive random sampling, Returns the parameters
-    that gave the minimum function value"""
-    if n_samples > 100000:
-        n_samples = 100000
-    # sample points from search space
-    random_samples = space.rvs(n_samples=n_samples)
-
-    # make estimations with surrogate
-    y_random = model.predict(space.transform(random_samples))
-    index_best_objective = np.argmin(y_random)
-    min_x = random_samples[index_best_objective]
-
-    return min_x
-
-
-def evaluate_min_params(result, params='result',
-                        expected_minimum_samples=None,
+def _evaluate_min_params(result, params='result',
+                        n_minimum_search=None,
                         random_state=None):
+    """Returns the minimum based on `params`"""
     x_vals = None
     space = result.space
     if isinstance(params, str):
@@ -713,12 +710,12 @@ def evaluate_min_params(result, params='result',
                 raise ValueError('expected_minimum does not support any'
                                  'categorical values')
             # Do a gradient based minimum search using scipys own minimizer
-            if expected_minimum_samples:
+            if n_minimum_search:
                 # If a value for
                 # expected_minimum_samples has been parsed
                 x_vals, _ = expected_minimum(
                     result,
-                    n_random_starts=expected_minimum_samples,
+                    n_random_starts=n_minimum_search,
                     random_state=random_state)
             else:  # Use standard of 20 random starting points
                 x_vals, _ = expected_minimum(result,
@@ -727,18 +724,20 @@ def evaluate_min_params(result, params='result',
         elif params == 'expected_minimum_random':
             # Do a minimum search by evaluating the function with
             # n_samples sample values
-            if expected_minimum_samples:
+            if n_minimum_search:
                 # If a value for
-                # expected_minimum_samples has been parsed
-                x_vals = expected_min_random_sampling(
-                    result.models[-1],
-                    space, n_samples=expected_minimum_samples)
+                # n_minimum_samples has been parsed
+                x_vals, _ = expected_minimum_random_sampling(
+                    result,
+                    n_random_starts=n_minimum_search,
+                    random_state=random_state)
             else:
                 # Use standard of 10^n_parameters. Note this
                 # becomes very slow for many parameters
-                x_vals = expected_min_random_sampling(
-                    result.models[-1], space,
-                    n_samples=10 ** len(result.x))
+                x_vals, _ = expected_minimum_random_sampling(
+                    result,
+                    n_random_starts=10 ** len(result.x),
+                    random_state=random_state)
         else:
             raise ValueError('Argument ´eval_min_params´ must be a valid'
                              'string (´result´)')
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index b2dd86ab9..a41b87cbf 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -9,8 +9,8 @@
 from skopt import plots, gp_minimize
 import matplotlib.pyplot as plt
 from skopt.benchmarks import bench3
-from skopt import expected_minimum
-from skopt.plots import evaluate_min_params
+from skopt import expected_minimum, expected_minimum_random_sampling
+from skopt.plots import _evaluate_min_params
 
 
 def save_axes(ax, filename):
@@ -40,16 +40,23 @@ def objective(params):
         return -np.mean(cross_val_score(clf, *load_breast_cancer(True)))
 
     res = gp_minimize(objective, SPACE, n_calls=10, random_state=3)
+
+    x_min, f_min = expected_minimum_random_sampling(res, random_state=1)
+    x_min2, f_min2 = expected_minimum(res, random_state=1)
+
+    assert x_min == x_min2
+    assert f_min == f_min2
+
     plots.plot_convergence(res)
     plots.plot_evaluations(res)
     plots.plot_objective(res)
     plots.plot_objective(res,
                          minimum='expected_minimum_random')
     plots.plot_objective(res,
-                         samples='expected_minimum_random',
-                         expected_minimum_samples=10)
+                         sample_source='expected_minimum_random',
+                         n_minimum_search=10000)
     plots.plot_objective(res,
-                         samples='result')
+                         sample_source='result')
     plots.plot_regret(res)
 
     # TODO: Compare plots to known good results?
@@ -80,10 +87,9 @@ def objective(params):
     plots.plot_objective(res,
                          minimum='expected_minimum')
     plots.plot_objective(res,
-                         samples='expected_minimum',
-                         expected_minimum_samples=10)
-    plots.plot_objective(res,
-                         samples='result')
+                         sample_source='expected_minimum',
+                         n_minimum_search=10)
+    plots.plot_objective(res, sample_source='result')
     plots.plot_regret(res)
 
     # TODO: Compare plots to known good results?
@@ -101,11 +107,16 @@ def test_evaluate_min_params():
                       random_state=1)
 
     x_min, f_min = expected_minimum(res, random_state=1)
-
-    assert evaluate_min_params(res, params='result') == res.x
-    assert evaluate_min_params(res, params=[1.]) == [1.]
-    assert evaluate_min_params(res, params='expected_minimum',
-                               random_state=1) == x_min
-    assert evaluate_min_params(res, params='expected_minimum',
-                               expected_minimum_samples=20,
-                               random_state=1) == x_min
+    x_min2, f_min2 = expected_minimum_random_sampling(res, n_random_starts=1000,
+                                                      random_state=1)
+
+    assert _evaluate_min_params(res, params='result') == res.x
+    assert _evaluate_min_params(res, params=[1.]) == [1.]
+    assert _evaluate_min_params(res, params='expected_minimum',
+                                random_state=1) == x_min
+    assert _evaluate_min_params(res, params='expected_minimum',
+                                n_minimum_search=20,
+                                random_state=1) == x_min
+    assert _evaluate_min_params(res, params='expected_minimum_random',
+                                n_minimum_search=1000,
+                                random_state=1) == x_min2
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index 4cfbd1088..a1a3697f1 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -655,3 +655,14 @@ def test_purely_categorical_space():
     x = optimizer.ask()
     # before the fix this call raised an exception
     optimizer.tell(x, 1.)
+
+
+@pytest.mark.fast_test
+def test_partly_categorical_space():
+    dims = Space([Categorical(['a', 'b', 'c']), Categorical(['A', 'B', 'C'])])
+    assert dims.is_partly_categorical
+    dims = Space([Categorical(['a', 'b', 'c']), Integer(1, 2)])
+    assert dims.is_partly_categorical
+    assert not dims.is_categorical
+    dims = Space([Integer(1, 2), Integer(1, 2)])
+    assert not dims.is_partly_categorical
diff --git a/skopt/tests/test_transformers.py b/skopt/tests/test_transformers.py
index 48dd9f129..fc1d95b3a 100644
--- a/skopt/tests/test_transformers.py
+++ b/skopt/tests/test_transformers.py
@@ -40,6 +40,7 @@ def test_normalize_integer():
     assert_raises(ValueError, transformer.inverse_transform, 1. + 1e-8)
     assert_raises(ValueError, transformer.transform, 0. - 1e-8)
 
+
 @pytest.mark.fast_test
 def test_normalize():
     transformer = Normalize(1, 20, is_int=False)
diff --git a/skopt/tests/test_utils.py b/skopt/tests/test_utils.py
index ddbdc8504..a9ea901d8 100644
--- a/skopt/tests/test_utils.py
+++ b/skopt/tests/test_utils.py
@@ -8,7 +8,7 @@
 from skopt import gp_minimize, forest_minimize
 from skopt import load
 from skopt import dump
-from skopt import expected_minimum
+from skopt import expected_minimum, expected_minimum_random_sampling
 from skopt.benchmarks import bench1
 from skopt.benchmarks import bench3
 from skopt.learning import ExtraTreesRegressor
@@ -105,6 +105,24 @@ def test_expected_minimum():
     assert f_min == f_min2
 
 
+@pytest.mark.fast_test
+def test_expected_minimum_random_sampling():
+    res = gp_minimize(bench3,
+                      [(-2.0, 2.0)],
+                      x0=[0.],
+                      noise=1e-8,
+                      n_calls=8,
+                      n_random_starts=3,
+                      random_state=1)
+
+    x_min, f_min = expected_minimum_random_sampling(res, random_state=1)
+    x_min2, f_min2 = expected_minimum_random_sampling(res, random_state=1)
+
+    assert f_min <= res.fun  # true since noise ~= 0.0
+    assert x_min == x_min2
+    assert f_min == f_min2
+
+
 @pytest.mark.fast_test
 def test_dict_list_space_representation():
     """
diff --git a/skopt/utils.py b/skopt/utils.py
index 912e067f7..ed7cb021d 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -201,9 +201,12 @@ def check_x_in_space(x, space):
 def expected_minimum(res, n_random_starts=20, random_state=None):
     """
     Compute the minimum over the predictions of the last surrogate model.
+    Uses `expected_minimum_random_sampling` with `n_random_starts`=100000,
+    when the space contains any categorical values.
 
-    Note that the returned minimum may not necessarily be an accurate
-    prediction of the minimum of the true objective function.
+    .. note::
+        The returned minimum may not necessarily be an accurate
+        prediction of the minimum of the true objective function.
 
     Parameters
     ----------
@@ -225,6 +228,10 @@ def expected_minimum(res, n_random_starts=20, random_state=None):
     fun : float
         the surrogate function value at the minimum.
     """
+    if res.space.is_partly_categorical:
+        return expected_minimum_random_sampling(res, n_random_starts=100000,
+                                                random_state=random_state)
+
     def func(x):
         reg = res.models[-1]
         x = res.space.transform(x.reshape(1, -1))
@@ -247,6 +254,49 @@ def func(x):
     return [v for v in best_x], best_fun
 
 
+def expected_minimum_random_sampling(res, n_random_starts=100000, random_state=None):
+    """Minimum search by doing naive random sampling, Returns the parameters
+    that gave the minimum function value. Can be used when the space
+    contains any categorical values.
+
+    .. note::
+        The returned minimum may not necessarily be an accurate
+        prediction of the minimum of the true objective function.
+
+    Parameters
+    ----------
+    res : `OptimizeResult`, scipy object
+        The optimization result returned by a `skopt` minimizer.
+
+    n_random_starts : int, default=100000
+        The number of random starts for the minimization of the surrogate
+        model.
+
+    random_state : int, RandomState instance, or None (default)
+        Set random state to something other than None for reproducible
+        results.
+
+    Returns
+    -------
+    x : list]
+        location of the minimum.
+    fun : float
+        the surrogate function value at the minimum.
+    """
+
+
+    # sample points from search space
+    random_samples = res.space.rvs(n_random_starts, random_state=random_state)
+
+    # make estimations with surrogate
+    model = res.models[-1]
+    y_random = model.predict(res.space.transform(random_samples))
+    index_best_objective = np.argmin(y_random)
+    min_x = random_samples[index_best_objective]
+
+    return min_x, y_random[index_best_objective]
+
+
 def has_gradients(estimator):
     """
     Check if an estimator's ``predict`` method provides gradients.
@@ -546,7 +596,7 @@ def use_named_args(dimensions):
     Examples
     --------
     >>> # Define the search-space dimensions. They must all have names!
-    >>> from skopt.space.Space import Real
+    >>> from skopt.space import Real
     >>> from skopt import forest_minimize
     >>> from skopt.utils import use_named_args
     >>> dim1 = Real(name='foo', low=0.0, high=1.0)

From 53dcbeea335c769ebb1ec714701f87077b90e257 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 14:08:47 +0100
Subject: [PATCH 033/265] Fix unit test and pep8

---
 skopt/plots.py            | 4 ++--
 skopt/space/space.py      | 2 +-
 skopt/tests/test_plots.py | 3 ++-
 skopt/utils.py            | 4 ++--
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 0251355cc..eb0b8c09a 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -695,8 +695,8 @@ def _cat_format(dimension, x, _):
 
 
 def _evaluate_min_params(result, params='result',
-                        n_minimum_search=None,
-                        random_state=None):
+                         n_minimum_search=None,
+                         random_state=None):
     """Returns the minimum based on `params`"""
     x_vals = None
     space = result.space
diff --git a/skopt/space/space.py b/skopt/space/space.py
index 100051310..9d91f8e8a 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -425,7 +425,7 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
             self._rvs = _uniform_inclusive(0.0, 1.0)
             if self.prior == "uniform":
                 self.transformer = Pipeline(
-                    [Identity(), Normalize(low, high)])
+                    [Identity(), Normalize(low, high, is_int=True)])
             else:
 
                 self.transformer = Pipeline(
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index a41b87cbf..810f4a9bd 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -107,7 +107,8 @@ def test_evaluate_min_params():
                       random_state=1)
 
     x_min, f_min = expected_minimum(res, random_state=1)
-    x_min2, f_min2 = expected_minimum_random_sampling(res, n_random_starts=1000,
+    x_min2, f_min2 = expected_minimum_random_sampling(res,
+                                                      n_random_starts=1000,
                                                       random_state=1)
 
     assert _evaluate_min_params(res, params='result') == res.x
diff --git a/skopt/utils.py b/skopt/utils.py
index ed7cb021d..5cdf43c6e 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -254,7 +254,8 @@ def func(x):
     return [v for v in best_x], best_fun
 
 
-def expected_minimum_random_sampling(res, n_random_starts=100000, random_state=None):
+def expected_minimum_random_sampling(res, n_random_starts=100000,
+                                     random_state=None):
     """Minimum search by doing naive random sampling, Returns the parameters
     that gave the minimum function value. Can be used when the space
     contains any categorical values.
@@ -284,7 +285,6 @@ def expected_minimum_random_sampling(res, n_random_starts=100000, random_state=N
         the surrogate function value at the minimum.
     """
 
-
     # sample points from search space
     random_samples = res.space.rvs(n_random_starts, random_state=random_state)
 

From 55c31f381335c84f9eaf6d73e035b80988948b66 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 15:29:47 +0100
Subject: [PATCH 034/265] Improve plot doc and create new plots example
 directoy

---
 doc/modules/plots.rst                         | 40 +++++++++++++++----
 doc/templates/index.html                      |  4 +-
 examples/plots/README.txt                     |  6 +++
 ...artial-dependence-plot-with-categorical.py |  0
 .../{ => plots}/partial-dependence-plot.py    |  0
 examples/{ => plots}/visualizing-results.py   |  0
 skopt/plots.py                                |  6 +--
 7 files changed, 43 insertions(+), 13 deletions(-)
 create mode 100644 examples/plots/README.txt
 rename examples/{ => plots}/partial-dependence-plot-with-categorical.py (100%)
 rename examples/{ => plots}/partial-dependence-plot.py (100%)
 rename examples/{ => plots}/visualizing-results.py (100%)

diff --git a/doc/modules/plots.rst b/doc/modules/plots.rst
index d33c3f65f..7762764e6 100644
--- a/doc/modules/plots.rst
+++ b/doc/modules/plots.rst
@@ -1,13 +1,39 @@
-.. currentmodule:: skopt.plots
 
 .. _plots:
 
+==============
 Plotting tools
 ==============
-Plotting functions.
 
-* :class:`partial_dependence`
-* :class:`plot_convergence`
-* :class:`plot_evaluations`
-* :class:`plot_objective`
-* :class:`plot_regret`
\ No newline at end of file
+.. currentmodule:: skopt.plots
+
+Plotting functions can be used to visualize the optimization process.
+
+plot_convergence
+================
+:class:`plot_convergence` plots one or several convergence traces.
+
+.. figure:: ../auto_examples/images/sphx_glr_hyperparameter-optimization_001.png
+   :target: ../auto_examples/hyperparameter-optimization.html
+   :align: center
+
+plot_evaluations
+================
+:class:`plot_evaluations` visualize the order in which points where sampled.
+
+.. figure:: ../auto_examples/images/sphx_glr_visualizing-results_002.png
+   :target: ../auto_examples/plots/visualizing-results.htm
+   :align: center
+
+plot_objective
+==============
+:class:`plot_objective` creates pairwise dependence plot of the objective function.
+
+.. figure:: ../auto_examples/images/sphx_glr_partial-dependence-plot_001.png
+   :target: ../auto_examples/plots/partial-dependence-plot.html
+   :align: center
+
+
+plot_regret
+===========
+:class:`plot_regret` plot one or several cumulative regret traces.
diff --git a/doc/templates/index.html b/doc/templates/index.html
index 8244df212..6313c2ede 100644
--- a/doc/templates/index.html
+++ b/doc/templates/index.html
@@ -55,11 +55,11 @@ <h4 class="sk-landing-subheader text-dark font-italic mb-3">Sequential model-bas
     <div class="col-md-4 mb-3 px-md-2 sk-px-xl-4">
       <div class="card h-100">
         <div class="card-body">
-          <a href="auto_examples/visualizing-results.html"><h4 class="sk-card-title card-title">Visualizing</h4></a>
+          <a href="modules/plots.html"><h4 class="sk-card-title card-title">Visualizing</h4></a>
           <p class="card-text">Visualizing optimization results</p>
         </div>
         <div class="overflow-hidden mx-2 text-center flex-fill">
-          <a href="auto_examples/visualizing-results.html"  aria-label="Visualizing">
+          <a href="auto_examples/plots/visualizing-results.html"  aria-label="Visualizing">
           <img src="_images/sphx_glr_visualizing-results_006.png" class="sk-index-img" alt="Visualizing">
           </a>
         </div>
diff --git a/examples/plots/README.txt b/examples/plots/README.txt
new file mode 100644
index 000000000..9acaa11b1
--- /dev/null
+++ b/examples/plots/README.txt
@@ -0,0 +1,6 @@
+.. _plots_examples:
+
+Plotting functions
+------------------
+
+Examples concerning the :mod:`skopt.plots` module.
diff --git a/examples/partial-dependence-plot-with-categorical.py b/examples/plots/partial-dependence-plot-with-categorical.py
similarity index 100%
rename from examples/partial-dependence-plot-with-categorical.py
rename to examples/plots/partial-dependence-plot-with-categorical.py
diff --git a/examples/partial-dependence-plot.py b/examples/plots/partial-dependence-plot.py
similarity index 100%
rename from examples/partial-dependence-plot.py
rename to examples/plots/partial-dependence-plot.py
diff --git a/examples/visualizing-results.py b/examples/plots/visualizing-results.py
similarity index 100%
rename from examples/visualizing-results.py
rename to examples/plots/visualizing-results.py
diff --git a/skopt/plots.py b/skopt/plots.py
index eb0b8c09a..c3f863c1b 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -453,10 +453,8 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         Defines to samples generation to use for averaging the model function
         at each of the `n_points`.
 
-        A real partial dependence plot is only generated, when `sample_source`
-        is set to 'random', but as this can be slow, it can be speed up by
-        setting it to the other parameter values, which always use
-        `n_samples=1`.
+        A partial dependence plot is only generated, when `sample_source`
+        is set to 'random' and `n_samples` is sufficient.
 
         `sample_source` can also be a list of
         floats, which is then used for averaging.

From ff8f98ece866ab64bf2b92c490383d449556d0ab Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 15:30:06 +0100
Subject: [PATCH 035/265] Improve plot examples

---
 ...artial-dependence-plot-with-categorical.py | 22 ++++++-----
 examples/plots/partial-dependence-plot.py     | 39 ++++++++++++-------
 2 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/examples/plots/partial-dependence-plot-with-categorical.py b/examples/plots/partial-dependence-plot-with-categorical.py
index 0e2f71e33..733334823 100644
--- a/examples/plots/partial-dependence-plot-with-categorical.py
+++ b/examples/plots/partial-dependence-plot-with-categorical.py
@@ -8,8 +8,8 @@
 
 .. currentmodule:: skopt
 
-This notebook serves to showcase the new features that are being added to
-the scikit-optimize toolbox.
+Plot objective now supports optional use of partial dependence as well as
+different methods of defining parameter values for dependency plots.
 """
 print(__doc__)
 import sys
@@ -38,7 +38,8 @@ def objective(params):
     return -np.mean(cross_val_score(clf, *load_breast_cancer(True)))
 
 #############################################################################
-
+# Bayesian optimization
+# =====================
 SPACE = [
     Integer(1, 20, name='max_depth'),
     Integer(2, 100, name='min_samples_split'),
@@ -49,15 +50,11 @@ def objective(params):
     Categorical(list('def'), name='dummy'),
 ]
 
-#############################################################################
-
 result = gp_minimize(objective, SPACE, n_calls=20)
 
 #############################################################################
-# plot_objective
-# --------------
-# Plot objective now supports optional use of partial dependence as well as
-# different methods of defining parameter values for dependency plots
+# Partial dependence plot
+# =======================
 #
 # Here we see an example of using partial dependence. Even when setting
 # n_points all the way down to 10 from the default of 40, this method is
@@ -67,6 +64,8 @@ def objective(params):
 _ = plot_objective(result, n_points=10)
 
 #############################################################################
+# Plot without partial dependence
+# ===============================
 # Here we plot without partial dependence. We see that it is a lot faster.
 # Also the values for the other parameters are set to the default "result"
 # which is the parameter set of the best observed value so far. In the case
@@ -75,7 +74,8 @@ def objective(params):
 _ = plot_objective(result,  sample_source='result', n_points=10)
 
 #############################################################################
-#
+# Modify the shown minimum
+# ========================
 # Here we try with setting the other parameters to something other than
 # "result". When dealing with categorical dimensions we can't use
 # 'expected_minimum'. Therefore we try with "expected_minimum_random"
@@ -87,6 +87,8 @@ def objective(params):
                    minimum='expected_minimum_random', n_minimum_search=10000)
 
 #############################################################################
+# Set a minimum location
+# ======================
 # Lastly we can also define these parameters ourselfs by
 # parsing a list as the pars argument:
 
diff --git a/examples/plots/partial-dependence-plot.py b/examples/plots/partial-dependence-plot.py
index bc68adfc4..f85e8e152 100644
--- a/examples/plots/partial-dependence-plot.py
+++ b/examples/plots/partial-dependence-plot.py
@@ -8,8 +8,8 @@
 
 .. currentmodule:: skopt
 
-This notebook serves to showcase the new features that are being added to
-the scikit-optimize toolbox.
+Plot objective now supports optional use of partial dependence as well as
+different methods of defining parameter values for dependency plots.
 """
 print(__doc__)
 import sys
@@ -21,8 +21,8 @@
 
 
 #############################################################################
-# plot_objective
-# ==============
+# Objective function
+# ==================
 # Plot objective now supports optional use of partial dependence as well as
 # different methods of defining parameter values for dependency plots
 
@@ -35,7 +35,8 @@ def funny_func(x):
 
 
 #############################################################################
-
+# Optimisation using decision trees
+# =================================
 # We run forest_minimize on the function
 bounds = [(-1, 1.), ] * 3
 n_calls = 150
@@ -45,6 +46,8 @@ def funny_func(x):
                          random_state=4)
 
 #############################################################################
+# Partial dependence plot
+# =======================
 # Here we see an example of using partial dependence. Even when setting
 # n_points all the way down to 10 from the default of 40, this method is
 # still very slow. This is because partial dependence calculates 250 extra
@@ -61,6 +64,8 @@ def funny_func(x):
 
 _ = plot_objective(result, n_points=10, minimum='expected_minimum')
 #############################################################################
+# Plot without partial dependence
+# ===============================
 # Here we plot without partial dependence. We see that it is a lot faster.
 # Also the values for the other parameters are set to the default "result"
 # which is the parameter set of the best observed value so far. In the case
@@ -69,6 +74,8 @@ def funny_func(x):
 _ = plot_objective(result,  sample_source='result', n_points=10)
 
 #############################################################################
+# Modify the shown minimum
+# ========================
 # Here we try with setting the `minimum` parameters to something other than
 # "result". First we try with "expected_minimum" which is the set of
 # parameters that gives the miniumum value of the surrogate function,
@@ -84,13 +91,6 @@ def funny_func(x):
 _ = plot_objective(result, n_points=10, sample_source='expected_minimum_random',
                    minimum='expected_minimum_random')
 
-#############################################################################
-# Lastly we can also define these parameters ourself by parsing a list
-# as the minimum argument:
-
-_ = plot_objective(result, n_points=10, sample_source=[1, -0.5, 0.5],
-                   minimum=[1, -0.5, 0.5])
-
 #############################################################################
 # We can also specify how many initial samples are used for the two different
 # "expected_minimum" methods. We set it to a low value in the next examples
@@ -102,5 +102,16 @@ def funny_func(x):
 
 #############################################################################
 
-_ = plot_objective(result, n_points=10,
-                   minimum='expected_minimum', n_minimum_search=1)
+_ = plot_objective(result, n_points=10, sample_source="expected_minimum",
+                   minimum='expected_minimum', n_minimum_search=2)
+
+#############################################################################
+# Set a minimum location
+# ======================
+# Lastly we can also define these parameters ourself by parsing a list
+# as the minimum argument:
+
+_ = plot_objective(result, n_points=10, sample_source=[1, -0.5, 0.5],
+                   minimum=[1, -0.5, 0.5])
+
+

From 6b5b698dfe61f696939a1c25509b6796b12be503 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 15:43:38 +0100
Subject: [PATCH 036/265] Improve doc strings

---
 skopt/plots.py | 41 ++++++++++++++++++++++++-----------------
 skopt/utils.py | 11 ++++++-----
 2 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index c3f863c1b..ed96eca21 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -459,26 +459,33 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         `sample_source` can also be a list of
         floats, which is then used for averaging.
 
-        Valid strings:  'random' - `n_samples` random samples will used
-                        'result' - Use only the best observed parameters
-                        'expected_minimum' - Parameters that gives the best
-                            minimum Calculated using scipy's minimize method.
-                            This method currently does not work with
-                            categorical values.
-                        'expected_minimum_random' - Parameters that gives the
-                            best minimum when using naive random sampling.
-                            Works with categorical values
+        Valid strings:
+
+            - 'random' - `n_samples` random samples will used
+
+            - 'result' - Use only the best observed parameters
+
+            - 'expected_minimum' - Parameters that gives the best
+                  minimum Calculated using scipy's minimize method.
+                  This method currently does not work with categorical values.
+
+            - 'expected_minimum_random' - Parameters that gives the
+                  best minimum when using naive random sampling.
+                  Works with categorical values.
 
     minimum : str or list of floats, default = 'result'
         Defines the values for the red points in the plots.
-        Valid strings:  'result' - Use best observed parameters
-                        'expected_minimum' - Parameters that gives the best
-                            minimum Calculated using scipy's minimize method.
-                            This method currently does not work with
-                            categorical values.
-                        'expected_minimum_random' - Parameters that gives the
-                            best minimum when using naive random sampling.
-                            Works with categorical values
+        Valid strings:
+
+            - 'result' - Use best observed parameters
+
+            - 'expected_minimum' - Parameters that gives the best
+                  minimum Calculated using scipy's minimize method.
+                  This method currently does not work with categorical values.
+
+            - 'expected_minimum_random' - Parameters that gives the
+                  best minimum when using naive random sampling.
+                  Works with categorical values
 
     n_minimum_search : int, default = None
         Determines how many points should be evaluated
diff --git a/skopt/utils.py b/skopt/utils.py
index 5cdf43c6e..5192a94dd 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -152,9 +152,10 @@ def load(filename, **kwargs):
     Reconstruct a skopt optimization result from a file
     persisted with skopt.dump.
 
-    Notice that the loaded optimization result can be missing
-    the objective function (`.specs['args']['func']`) if `skopt.dump`
-    was called with `store_objective=False`.
+    .. note::
+        Notice that the loaded optimization result can be missing
+        the objective function (`.specs['args']['func']`) if `skopt.dump`
+        was called with `store_objective=False`.
 
     Parameters
     ----------
@@ -223,7 +224,7 @@ def expected_minimum(res, n_random_starts=20, random_state=None):
 
     Returns
     -------
-    x : list]
+    x : list
         location of the minimum.
     fun : float
         the surrogate function value at the minimum.
@@ -279,7 +280,7 @@ def expected_minimum_random_sampling(res, n_random_starts=100000,
 
     Returns
     -------
-    x : list]
+    x : list
         location of the minimum.
     fun : float
         the surrogate function value at the minimum.

From 959525fe608bf09a8fdc58715abe2dcbcf9b5223 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 17:00:54 +0100
Subject: [PATCH 037/265] Add unit test and fix doc strings

---
 skopt/tests/test_utils.py | 32 ++++++++++++++++++++++++++++++++
 skopt/utils.py            | 34 +++++++++++-----------------------
 2 files changed, 43 insertions(+), 23 deletions(-)

diff --git a/skopt/tests/test_utils.py b/skopt/tests/test_utils.py
index 231679304..10ac9aa4b 100644
--- a/skopt/tests/test_utils.py
+++ b/skopt/tests/test_utils.py
@@ -3,6 +3,7 @@
 
 from numpy.testing import assert_array_equal
 from numpy.testing import assert_equal
+from numpy.testing import assert_raises
 import numpy as np
 
 from skopt import gp_minimize
@@ -14,6 +15,7 @@
 from skopt.learning import ExtraTreesRegressor
 from skopt import Optimizer
 from skopt import Space
+from skopt.space import Dimension
 from skopt.utils import point_asdict
 from skopt.utils import point_aslist
 from skopt.utils import dimensions_aslist
@@ -21,6 +23,8 @@
 from skopt.utils import cook_estimator
 from skopt.utils import normalize_dimensions
 from skopt.utils import use_named_args
+from skopt.utils import check_list_types
+from skopt.utils import check_dimension_names
 from skopt.space import Real, Integer, Categorical
 
 
@@ -241,3 +245,31 @@ def func(foo, bar, baz):
     # argument that is an unnamed numpy array.
     res = func(np.array(default_parameters))
     assert (isinstance(res, float))
+
+
+@pytest.mark.fast_test
+def test_check_dimension_names():
+    # Define the search-space dimensions. They must all have names!
+    dim1 = Real(name='foo', low=0.0, high=1.0)
+    dim2 = Real(name='bar', low=0.0, high=1.0)
+    dim3 = Real(name='baz', low=0.0, high=1.0)
+
+    # Gather the search-space dimensions in a list.
+    dimensions = [dim1, dim2, dim3]
+    check_dimension_names(dimensions)
+    dimensions = [dim1, dim2, dim3, Real(-1, 1)]
+    assert_raises(ValueError, check_dimension_names, dimensions)
+
+
+@pytest.mark.fast_test
+def test_check_list_types():
+    # Define the search-space dimensions. They must all have names!
+    dim1 = Real(name='foo', low=0.0, high=1.0)
+    dim2 = Real(name='bar', low=0.0, high=1.0)
+    dim3 = Real(name='baz', low=0.0, high=1.0)
+
+    # Gather the search-space dimensions in a list.
+    dimensions = [dim1, dim2, dim3]
+    check_list_types(dimensions, Dimension)
+    dimensions = [dim1, dim2, dim3, "test"]
+    assert_raises(ValueError, check_list_types, dimensions, Dimension)
diff --git a/skopt/utils.py b/skopt/utils.py
index 0462cf769..aa33e7cd0 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -529,26 +529,21 @@ def check_list_types(x, types):
     """
     Check whether all elements of a list `x` are of the correct type(s)
     and raise a ValueError if they are not.
-    
+
     Note that `types` can be either a single object-type or a tuple
     of object-types.
-    
+
+    Raises `ValueError`, If one or more element in the list `x` is
+    not of the correct type(s).
+
     Parameters
     ----------
-    * `x` [list]:
+    x : list
         List of objects.
 
-    * `types` [object or list(object)]:
+    types : object or list(object)
         Either a single object-type or a tuple of object-types.
 
-    Exceptions
-    ----------
-    * `ValueError`:
-        If one or more element in the list `x` is not of the correct type(s).
-
-    Returns
-    -------
-    * Nothing.
     """
 
     # List of the elements in the list that are incorrectly typed.
@@ -563,21 +558,14 @@ def check_list_types(x, types):
 
 def check_dimension_names(dimensions):
     """
-    Check whether all dimensions have names.
+    Check whether all dimensions have names. Raises `ValueError`,
+    if one or more dimensions are unnamed.
 
     Parameters
     ----------
-    * `dimensions` [list(Dimension)]:
+    dimensions : list(Dimension)
         List of Dimension-objects.
 
-    Exceptions
-    ----------
-    * `ValueError`:
-        If one or more dimensions are unnamed.
-
-    Returns
-    -------
-    * Nothing.
     """
 
     # List of the dimensions that have no names.
@@ -611,7 +599,7 @@ def use_named_args(dimensions):
     Examples
     --------
     >>> # Define the search-space dimensions. They must all have names!
-    >>> from skopt.space.Space import Real
+    >>> from skopt.space import Real
     >>> from skopt import forest_minimize
     >>> from skopt.utils import use_named_args
     >>> dim1 = Real(name='foo', low=0.0, high=1.0)

From 5ea3ac5d6bc40c4ec6619afb83221174965ad934 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 17:26:08 +0100
Subject: [PATCH 038/265] Add unit test for issue #742

---
 skopt/tests/test_utils.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/skopt/tests/test_utils.py b/skopt/tests/test_utils.py
index 39476638a..e50e4bcfa 100644
--- a/skopt/tests/test_utils.py
+++ b/skopt/tests/test_utils.py
@@ -268,6 +268,21 @@ def func(foo, bar, baz):
     assert (isinstance(res, float))
 
 
+@pytest.mark.fast_test
+def test_space_names_in_use_named_args():
+    space = [Integer(250, 2000, name='n_estimators')]
+
+    @use_named_args(space)
+    def objective(n_estimators):
+        print(f'score: {n_estimators}')
+        return n_estimators
+
+    res = gp_minimize(objective, space, n_calls=10, random_state=0)
+    best_params = dict(zip((s.name for s in res.space), res.x))
+    assert 'n_estimators' in best_params
+    assert res.space.dimensions[0].name == 'n_estimators'
+
+
 @pytest.mark.fast_test
 def test_check_dimension_names():
     # Define the search-space dimensions. They must all have names!

From b8b2a1bb05a8c3fd4c577b06306291b58e54d693 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 17:36:59 +0100
Subject: [PATCH 039/265] Add exception to fix issue #740

---
 skopt/plots.py            |  2 ++
 skopt/tests/test_plots.py | 21 ++++++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index ed96eca21..5023a5999 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -505,6 +505,8 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     # calculating dependence. (Unless partial
     # dependence is to be used instead).
     space = result.space
+    if space.n_dims == 1:
+        raise ValueError("plot_objective needs at least two variables. Found only one.")
     x_vals = _evaluate_min_params(result, minimum, n_minimum_search)
     if sample_source == "random":
         x_eval = None
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index 810f4a9bd..4a54c4718 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -4,13 +4,14 @@
 from sklearn.datasets import load_breast_cancer
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.model_selection import cross_val_score
-
+from numpy.testing import assert_raises
 from skopt.space import Integer, Categorical
 from skopt import plots, gp_minimize
 import matplotlib.pyplot as plt
 from skopt.benchmarks import bench3
 from skopt import expected_minimum, expected_minimum_random_sampling
 from skopt.plots import _evaluate_min_params
+from skopt import Optimizer
 
 
 def save_axes(ax, filename):
@@ -121,3 +122,21 @@ def test_evaluate_min_params():
     assert _evaluate_min_params(res, params='expected_minimum_random',
                                 n_minimum_search=1000,
                                 random_state=1) == x_min2
+
+
+def test_names_dimensions():
+    # Define objective
+    def objective(x, noise_level=0.1):
+        return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) + np.random.randn() * noise_level
+
+    # Initialize Optimizer
+    opt = Optimizer([(-2.0, 2.0)], n_initial_points=1)
+
+    # Optimize
+    for i in range(2):
+        next_x = opt.ask()
+        f_val = objective(next_x)
+        res = opt.tell(next_x, f_val)
+
+    # Plot results
+    assert_raises(ValueError, plots.plot_objective, res)

From c909e4df64a3d3606f9063619fb4c8e5ba7f9fae Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 17:42:47 +0100
Subject: [PATCH 040/265] Add unit tests for #717

---
 skopt/tests/test_optimizer.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/skopt/tests/test_optimizer.py b/skopt/tests/test_optimizer.py
index d19e6dafa..fab8766a8 100644
--- a/skopt/tests/test_optimizer.py
+++ b/skopt/tests/test_optimizer.py
@@ -328,3 +328,23 @@ def test_defaults_are_equivalent():
     res_opt2 = opt.get_result()
     assert np.allclose(res_min.x_iters, res_opt2.x_iters)  # , atol=1e-5)
     assert np.allclose(res_min.x, res_opt2.x)  # , atol=1e-5)
+
+
+@pytest.mark.fast_test
+def test_dimensions_names():
+    from skopt.space import Real, Categorical, Integer
+    # create search space and optimizer
+    space = [Real(0, 1, name='real'),
+             Categorical(['a', 'b', 'c'], name='cat'),
+             Integer(0, 1, name='int')]
+    opt = Optimizer(space, n_initial_points=1)
+    # result of the optimizer missing dimension names
+    result = opt.tell([(0.5, 'a', 0.5)], [3])
+    names = []
+    for d in result.space.dimensions:
+        names.append(d.name)
+    assert len(names) == 3
+    assert "real" in names
+    assert "cat" in names
+    assert "int" in names
+    assert None not in names

From 30e51947fa3daa015b00b751faabdcf1b61a2a82 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 17:44:47 +0100
Subject: [PATCH 041/265] Fix unit test

---
 skopt/tests/test_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/skopt/tests/test_utils.py b/skopt/tests/test_utils.py
index e50e4bcfa..53e003d9d 100644
--- a/skopt/tests/test_utils.py
+++ b/skopt/tests/test_utils.py
@@ -274,7 +274,6 @@ def test_space_names_in_use_named_args():
 
     @use_named_args(space)
     def objective(n_estimators):
-        print(f'score: {n_estimators}')
         return n_estimators
 
     res = gp_minimize(objective, space, n_calls=10, random_state=0)

From 9ef1456bb92989c679626cafe4e22a24d43c6aeb Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 21:09:25 +0100
Subject: [PATCH 042/265] Add conftest and doctest

---
 .travis.yml                         |   3 +-
 Makefile                            |  67 ++++++++++++++++
 build_tools/travis/after_success.sh |  19 +++++
 build_tools/travis/test_docs.sh     |   6 ++
 build_tools/travis/test_script.sh   |  51 ++++++++++++
 conftest.py                         |  83 +++++++++++++++++++
 doc/Makefile                        | 118 ++++++++++++++++++++++++----
 7 files changed, 331 insertions(+), 16 deletions(-)
 create mode 100644 Makefile
 create mode 100644 build_tools/travis/after_success.sh
 create mode 100644 build_tools/travis/test_docs.sh
 create mode 100644 build_tools/travis/test_script.sh
 create mode 100644 conftest.py

diff --git a/.travis.yml b/.travis.yml
index 8f690c549..ad11b2b61 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,7 @@ cache:
 env:
   global:
     # Directory where tests are run from
-    - TEST_DIR=/tmp/sklearn
+    - TEST_DIR=/tmp/skopt
     - OMP_NUM_THREADS=4
     - OPENBLAS_NUM_THREADS=4
 
@@ -57,6 +57,7 @@ script:
     then pytest --cov=skopt --durations=10; else
     pytest --durations=10;
     fi
+- bash build_tools/travis/test_docs.sh
 
 after_success:
   - if [ ${COVERAGE} == "true" ]; then
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..13d6b3799
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,67 @@
+# simple makefile to simplify repetitive build env management tasks under posix
+
+# caution: testing won't work on windows, see README
+
+PYTHON ?= python
+CYTHON ?= cython
+PYTEST ?= pytest
+CTAGS ?= ctags
+
+# skip doctests on 32bit python
+BITS := $(shell python -c 'import struct; print(8 * struct.calcsize("P"))')
+
+all: clean inplace test
+
+clean-ctags:
+	rm -f tags
+
+clean: clean-ctags
+	$(PYTHON) setup.py clean
+	rm -rf dist
+	# TODO: Remove in when all modules are removed.
+	$(PYTHON) sklearn/_build_utils/deprecated_modules.py
+
+in: inplace # just a shortcut
+inplace:
+	$(PYTHON) setup.py build_ext -i
+
+test-code: in
+	$(PYTEST) --showlocals -v skopt --durations=20
+test-sphinxext:
+	$(PYTEST) --showlocals -v doc/sphinxext/
+test-doc:
+ifeq ($(BITS),64)
+	$(PYTEST) $(shell find doc -name '*.rst' | sort)
+endif
+test-code-parallel: in
+	$(PYTEST) -n auto --showlocals -v skopt --durations=20
+
+test-coverage:
+	rm -rf coverage .coverage
+	$(PYTEST) skopt --showlocals -v --cov=sklearn --cov-report=html:coverage
+test-coverage-parallel:
+	rm -rf coverage .coverage .coverage.*
+	$(PYTEST) skopt -n auto --showlocals -v --cov=sklearn --cov-report=html:coverage
+
+test: test-code test-sphinxext test-doc
+
+trailing-spaces:
+	find skopt -name "*.py" -exec perl -pi -e 's/[ \t]*$$//' {} \;
+
+ctags:
+	# make tags for symbol based navigation in emacs and vim
+	# Install with: sudo apt-get install exuberant-ctags
+	$(CTAGS) --python-kinds=-i -R skopt
+
+doc: inplace
+	$(MAKE) -C doc html
+
+doc-noplot: inplace
+	$(MAKE) -C doc html-noplot
+
+code-analysis:
+	flake8 sklearn | grep -v __init__ | grep -v external
+	pylint -E -i y skopt/ -d E1103,E0611,E1101
+
+flake8-diff:
+	./build_tools/circle/linting.sh
diff --git a/build_tools/travis/after_success.sh b/build_tools/travis/after_success.sh
new file mode 100644
index 000000000..494f86b6e
--- /dev/null
+++ b/build_tools/travis/after_success.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# This script is meant to be called by the "after_success" step defined in
+# .travis.yml. See https://docs.travis-ci.com/ for more details.
+
+# License: 3-clause BSD
+
+set -e
+
+if [[ "$COVERAGE" == "true" ]]; then
+    # Need to run codecov from a git checkout, so we copy .coverage
+    # from TEST_DIR where pytest has been run
+    cp $TEST_DIR/.coverage $TRAVIS_BUILD_DIR
+
+    # Ignore codecov failures as the codecov server is not
+    # very reliable but we don't want travis to report a failure
+    # in the github UI just because the coverage report failed to
+    # be published.
+    codecov --root $TRAVIS_BUILD_DIR || echo "codecov upload failed"
+fi
\ No newline at end of file
diff --git a/build_tools/travis/test_docs.sh b/build_tools/travis/test_docs.sh
new file mode 100644
index 000000000..d43b480fa
--- /dev/null
+++ b/build_tools/travis/test_docs.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+set -e
+set -x
+
+make test-doc
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
new file mode 100644
index 000000000..a14e50706
--- /dev/null
+++ b/build_tools/travis/test_script.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# This script is meant to be called by the "script" step defined in
+# .travis.yml. See https://docs.travis-ci.com/ for more details.
+# The behavior of the script is controlled by environment variabled defined
+# in the .travis.yml in the top level folder of the project.
+
+# License: 3-clause BSD
+
+set -e
+
+python --version
+python -c "import numpy; print('numpy %s' % numpy.__version__)"
+python -c "import scipy; print('scipy %s' % scipy.__version__)"
+python -c "\
+try:
+    import sklearn
+    print('sklearn %s' % sklearn.__version__)
+except ImportError:
+    pass
+"
+python -c "import multiprocessing as mp; print('%d CPUs' % mp.cpu_count())"
+
+run_tests() {
+    TEST_CMD="pytest --showlocals --durations=20 --pyargs"
+
+    # Get into a temp directory to run test from the installed scikit-learn and
+    # check if we do not leave artifacts
+    mkdir -p $TEST_DIR
+    # We need the setup.cfg for the pytest settings
+    cp setup.cfg $TEST_DIR
+    cd $TEST_DIR
+
+    # Skip tests that require large downloads over the network to save bandwidth
+    # usage as travis workers are stateless and therefore traditional local
+    # disk caching does not work.
+    export SKOPT_SKIP_NETWORK_TESTS=1
+
+    if [[ "$COVERAGE" == "true" ]]; then
+        TEST_CMD="$TEST_CMD --cov sklearn"
+    fi
+
+    if [[ -n "$CHECK_WARNINGS" ]]; then
+        TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning"
+    fi
+
+    set -x  # print executed commands to the terminal
+
+    $TEST_CMD skopt
+}
+
+run_tests
\ No newline at end of file
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 000000000..e2149e22e
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,83 @@
+# Even if empty this file is useful so that when running from the root folder
+# ./sklearn is added to sys.path by pytest. See
+# https://docs.pytest.org/en/latest/pythonpath.html for more details.  For
+# example, this allows to build extensions in place and run pytest
+# doc/modules/clustering.rst and use sklearn from the local folder rather than
+# the one from site-packages.
+
+import platform
+import sys
+from distutils.version import LooseVersion
+import os
+
+import pytest
+from _pytest.doctest import DoctestItem
+from sklearn.utils import _IS_32BIT
+
+
+PYTEST_MIN_VERSION = '3.3.0'
+
+if LooseVersion(pytest.__version__) < PYTEST_MIN_VERSION:
+    raise ImportError('Your version of pytest is too old, you should have '
+                      'at least pytest >= {} installed.'
+                      .format(PYTEST_MIN_VERSION))
+
+
+def pytest_addoption(parser):
+    parser.addoption("--skip-network", action="store_true", default=False,
+                     help="skip network tests")
+
+
+def pytest_collection_modifyitems(config, items):
+    # FeatureHasher is not compatible with PyPy
+    if platform.python_implementation() == 'PyPy':
+        skip_marker = pytest.mark.skip(
+            reason='FeatureHasher is not compatible with PyPy')
+        for item in items:
+            if item.name.endswith(('_hash.FeatureHasher',
+                                   'text.HashingVectorizer')):
+                item.add_marker(skip_marker)
+
+    # Skip tests which require internet if the flag is provided
+    if config.getoption("--skip-network"):
+        skip_network = pytest.mark.skip(
+            reason="test requires internet connectivity")
+        for item in items:
+            if "network" in item.keywords:
+                item.add_marker(skip_network)
+
+    # numpy changed the str/repr formatting of numpy arrays in 1.14. We want to
+    # run doctests only for numpy >= 1.14.
+    skip_doctests = False
+    try:
+        import numpy as np
+        if LooseVersion(np.__version__) < LooseVersion('1.14'):
+            reason = 'doctests are only run for numpy >= 1.14'
+            skip_doctests = True
+        elif _IS_32BIT:
+            reason = ('doctest are only run when the default numpy int is '
+                      '64 bits.')
+            skip_doctests = True
+        elif sys.platform.startswith("win32"):
+            reason = ("doctests are not run for Windows because numpy arrays "
+                      "repr is inconsistent across platforms.")
+            skip_doctests = True
+    except ImportError:
+        pass
+
+    if skip_doctests:
+        skip_marker = pytest.mark.skip(reason=reason)
+
+        for item in items:
+            if isinstance(item, DoctestItem):
+                item.add_marker(skip_marker)
+
+
+def pytest_configure(config):
+    import sys
+    sys._is_pytest_session = True
+
+
+def pytest_unconfigure(config):
+    import sys
+    del sys._is_pytest_session
diff --git a/doc/Makefile b/doc/Makefile
index a0e8bf588..05ba06142 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -1,22 +1,110 @@
-# Minimal makefile for Sphinx documentation
+# Makefile for Sphinx documentation
 #
 
 # You can set these variables from the command line.
-SPHINXOPTS    = -W --keep-going
-SPHINXBUILD   = sphinx-build
-SPHINXPROJ    = scikit-optimize
-SOURCEDIR     = source
-BUILDDIR      = build
+SPHINXOPTS    = -j auto
+SPHINXBUILD  ?= sphinx-build
+PAPER         =
+BUILDDIR      = _build
+ifneq ($(EXAMPLES_PATTERN),)
+    EXAMPLES_PATTERN_OPTS := -D sphinx_gallery_conf.filename_pattern="$(EXAMPLES_PATTERN)"
+endif
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -T -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\
+    $(EXAMPLES_PATTERN_OPTS) .
+
+
+.PHONY: help clean html dirhtml pickle json latex latexpdf changes linkcheck doctest optipng
+
+all: html-noplot
 
-# Put it first so that "make" without argument is like "make help".
 help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html      to make standalone HTML files"
+	@echo "  dirhtml   to make HTML files named index.html in directories"
+	@echo "  pickle    to make pickle files"
+	@echo "  json      to make JSON files"
+	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  changes   to make an overview of all changed/added/deprecated items"
+	@echo "  linkcheck to check all external links for integrity"
+	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	-rm -rf $(BUILDDIR)/*
+	-rm -rf auto_examples/
+	-rm -rf generated/*
+	-rm -rf modules/generated/
+
+html:
+	# These two lines make the build a bit more lengthy, and the
+	# the embedding of images more robust
+	rm -rf $(BUILDDIR)/html/_images
+	#rm -rf _build/doctrees/
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html"
+
+html-noplot:
+	$(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	make -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+# download-data:
+# 	python -c "from sklearn.datasets._lfw import _check_fetch_lfw; _check_fetch_lfw()"
 
-.PHONY: help Makefile
+# Optimize PNG files. Needs OptiPNG. Change the -P argument to the number of
+# cores you have available, so -P 64 if you have a real computer ;)
+optipng:
+	find _build auto_examples */generated -name '*.png' -print0 \
+	  | xargs -0 -n 1 -P 4 optipng -o10
 
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-    mkdir ./source/notebooks
-    cp -r ../examples/* ./source/notebooks
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+dist: html latexpdf
+	cp _build/latex/user_guide.pdf _build/html/_downloads/scikit-optimize-docs.pdf

From 2b55337e0c84dc682e115017712ed7a75d0049ed Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 21:12:25 +0100
Subject: [PATCH 043/265] Fix typo

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index ad11b2b61..8a2940959 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -57,7 +57,7 @@ script:
     then pytest --cov=skopt --durations=10; else
     pytest --durations=10;
     fi
-- bash build_tools/travis/test_docs.sh
+  - bash build_tools/travis/test_docs.sh
 
 after_success:
   - if [ ${COVERAGE} == "true" ]; then

From fcb39e8dc715f2c54e273f3ceea9e6192f2d4d16 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 21:27:54 +0100
Subject: [PATCH 044/265] Fix CI

---
 .travis.yml                           |  1 -
 build_tools/circle/build_test_pypy.sh | 48 +++++++++++++++++++++++++++
 conftest.py                           |  2 +-
 setup.cfg                             | 20 +++++++++++
 skopt/__init__.py                     |  4 +++
 5 files changed, 73 insertions(+), 2 deletions(-)
 create mode 100644 build_tools/circle/build_test_pypy.sh

diff --git a/.travis.yml b/.travis.yml
index 8a2940959..14e686f2a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -61,7 +61,6 @@ script:
 
 after_success:
   - if [ ${COVERAGE} == "true" ]; then
-    pip install codecov;
     codecov;
     fi
 
diff --git a/build_tools/circle/build_test_pypy.sh b/build_tools/circle/build_test_pypy.sh
new file mode 100644
index 000000000..2d7264af6
--- /dev/null
+++ b/build_tools/circle/build_test_pypy.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+set -x
+set -e
+
+apt-get -yq update
+apt-get -yq install libatlas-base-dev liblapack-dev gfortran ccache libopenblas-dev
+
+pip install virtualenv
+
+if command -v pypy3; then
+    virtualenv -p $(command -v pypy3) pypy-env
+elif command -v pypy; then
+    virtualenv -p $(command -v pypy) pypy-env
+fi
+
+source pypy-env/bin/activate
+
+python --version
+which python
+
+pip install -U pip
+
+# pins versions to install wheel from https://antocuni.github.io/pypy-wheels/manylinux2010
+pip install --extra-index-url https://antocuni.github.io/pypy-wheels/manylinux2010 numpy==1.18.0 scipy==1.3.2
+
+# Install Cython directly
+pip install https://antocuni.github.io/pypy-wheels/ubuntu/Cython/Cython-0.29.14-py3-none-any.whl
+pip install sphinx numpydoc docutils joblib pillow pytest
+
+ccache -M 512M
+export CCACHE_COMPRESS=1
+export PATH=/usr/lib/ccache:$PATH
+export LOKY_MAX_CPU_COUNT="2"
+export OMP_NUM_THREADS="1"
+
+python setup.py build_ext --inplace -j 3
+pip install --no-build-isolation -e .
+
+# Check that Python implementation is PyPy
+python - << EOL
+import platform
+from skopt import IS_PYPY
+assert IS_PYPY is True, "platform={}!=PyPy".format(platform.python_implementation())
+EOL
+
+python -m pytest skopt/
+python -m pytest doc/sphinxext/
+python -m pytest $(find doc -name '*.rst' | sort)
\ No newline at end of file
diff --git a/conftest.py b/conftest.py
index e2149e22e..f2a991049 100644
--- a/conftest.py
+++ b/conftest.py
@@ -12,7 +12,7 @@
 
 import pytest
 from _pytest.doctest import DoctestItem
-from sklearn.utils import _IS_32BIT
+from skopt import _IS_32BIT
 
 
 PYTEST_MIN_VERSION = '3.3.0'
diff --git a/setup.cfg b/setup.cfg
index 99ac4a263..5e9733634 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,3 +1,23 @@
+[aliases]
+test = pytest
+
+[tool:pytest]
+# disable-pytest-warnings should be removed once we rewrite tests
+# using yield with parametrize
+doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS
+addopts =
+    --ignore build_tools
+    --ignore benchmarks
+    --ignore doc
+    --ignore examples
+    --ignore maint_tools
+    --doctest-modules
+    --disable-pytest-warnings
+    -rs
+
+filterwarnings =
+    ignore:the matrix subclass:PendingDeprecationWarning
+
 # used by our travis auto-deployment system
 # needs changing if scikit-optimize ever stops being
 # a pure python module
diff --git a/skopt/__init__.py b/skopt/__init__.py
index 51d06c6e6..8abbf080c 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -37,6 +37,8 @@
     # We are not importing the rest of scikit-optimize during the build
     # process, as it may not be compiled yet
 else:
+    import platform
+    import struct
     from . import acquisition
     from . import benchmarks
     from . import callbacks
@@ -74,3 +76,5 @@
         "BayesSearchCV",
         "Space"
     )
+    IS_PYPY = platform.python_implementation() == 'PyPy'
+    _IS_32BIT = 8 * struct.calcsize("P") == 32

From 01d5fa982cc24703097beb586560c6f0209b5cda Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 21:40:15 +0100
Subject: [PATCH 045/265] Disable test doc

---
 .travis.yml | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 14e686f2a..e70a69a23 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -51,19 +51,10 @@ matrix:
 
 
 install: source build_tools/travis/install.sh
-
 script:
-  - if [ ${COVERAGE} == "true" ];
-    then pytest --cov=skopt --durations=10; else
-    pytest --durations=10;
-    fi
-  - bash build_tools/travis/test_docs.sh
-
-after_success:
-  - if [ ${COVERAGE} == "true" ]; then
-    codecov;
-    fi
-
+  - bash build_tools/travis/test_script.sh
+  # - bash build_tools/travis/test_docs.sh
+after_success: source build_tools/travis/after_success.sh
 deploy:
   provider: pypi
   user: __token__

From fa8eb7abac848570da759165900f4a4436fa60dd Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 22:03:55 +0100
Subject: [PATCH 046/265] Try to fix doctest

---
 doc/conftest.py    | 12 ++++++++++++
 skopt/callbacks.py |  6 ++++--
 skopt/utils.py     |  4 ++--
 3 files changed, 18 insertions(+), 4 deletions(-)
 create mode 100644 doc/conftest.py

diff --git a/doc/conftest.py b/doc/conftest.py
new file mode 100644
index 000000000..a52e06264
--- /dev/null
+++ b/doc/conftest.py
@@ -0,0 +1,12 @@
+import os
+from os.path import exists
+from os.path import join
+import warnings
+
+import numpy as np
+
+from skopt import IS_PYPY
+
+
+def pytest_runtest_setup(item):
+    fname = item.fspath.strpath
diff --git a/skopt/callbacks.py b/skopt/callbacks.py
index 7d8e8b2ef..d1357ce93 100644
--- a/skopt/callbacks.py
+++ b/skopt/callbacks.py
@@ -260,9 +260,11 @@ class CheckpointSaver(object):
     Examples
     --------
     >>> import skopt
-    >>>
+    >>> def obj_fun(x):
+    ...     return x[0]**2
     >>> checkpoint_callback = skopt.callbacks.CheckpointSaver("./result.pkl")
-    >>> skopt.gp_minimize(obj_fun, dims, callback=[checkpoint_callback])
+    >>> skopt.gp_minimize(obj_fun, [(-2, 2)], n_calls=10,
+    ...                   callback=[checkpoint_callback])
 
     Parameters
     ----------
diff --git a/skopt/utils.py b/skopt/utils.py
index f30210ff2..7f8fb36ba 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -664,8 +664,8 @@ def use_named_args(dimensions):
     >>> # and use this function-decorator to specify the
     >>> # search-space dimensions.
     >>> @use_named_args(dimensions=dimensions)
-    >>> def my_objective_function(foo, bar, baz):
-    >>>     return foo ** 2 + bar ** 4 + baz ** 8
+    ... def my_objective_function(foo, bar, baz):
+    ...     return foo ** 2 + bar ** 4 + baz ** 8
     >>>
     >>> # Not the function is callable from the outside as
     >>> # `my_objective_function(x)` where `x` is a list of unnamed arguments,

From d0746d6b70a9abe1e5814e195f571f3c6d3cda22 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 22:23:19 +0100
Subject: [PATCH 047/265] fix doc test

---
 skopt/callbacks.py | 2 +-
 skopt/utils.py     | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/skopt/callbacks.py b/skopt/callbacks.py
index d1357ce93..8c5e6d980 100644
--- a/skopt/callbacks.py
+++ b/skopt/callbacks.py
@@ -264,7 +264,7 @@ class CheckpointSaver(object):
     ...     return x[0]**2
     >>> checkpoint_callback = skopt.callbacks.CheckpointSaver("./result.pkl")
     >>> skopt.gp_minimize(obj_fun, [(-2, 2)], n_calls=10,
-    ...                   callback=[checkpoint_callback])
+    ...                   callback=[checkpoint_callback]) # doctest: +SKIP
 
     Parameters
     ----------
diff --git a/skopt/utils.py b/skopt/utils.py
index 7f8fb36ba..f60991585 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -428,7 +428,10 @@ def dimensions_aslist(search_space):
     >>> search_space = {'name1': Real(0,1),
     ...                 'name2': Integer(2,4), 'name3': Real(-1,1)}
     >>> dimensions_aslist(search_space)
-    [Real(0,1), Integer(2,4), Real(-1,1)]
+    [Real(low=0, high=1, prior='uniform', transform='identity'),
+        Integer(low=2, high=4, prior='uniform', transform='identity'),
+        Real(low=-1, high=1, prior='uniform', transform='identity')]
+
     """
     params_space_list = [
         search_space[k] for k in sorted(search_space.keys())
@@ -684,7 +687,9 @@ def use_named_args(dimensions):
     >>>
     >>> # Print the best-found results.
     >>> print("Best fitness:", result.fun)
+    Best fitness: 0.1948080835239698
     >>> print("Best parameters:", result.x)
+    Best parameters: [0.44134853091052617, 0.06570954323368307, 0.17586123323419825]
 
     Parameters
     ----------

From c238a44c1a18e301d211fc522ac58eaad39b292e Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 22:27:16 +0100
Subject: [PATCH 048/265] Fix doctest

---
 skopt/utils.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/skopt/utils.py b/skopt/utils.py
index f60991585..7bcd414e0 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -427,10 +427,12 @@ def dimensions_aslist(search_space):
     >>> from skopt.utils import dimensions_aslist
     >>> search_space = {'name1': Real(0,1),
     ...                 'name2': Integer(2,4), 'name3': Real(-1,1)}
-    >>> dimensions_aslist(search_space)
-    [Real(low=0, high=1, prior='uniform', transform='identity'),
-        Integer(low=2, high=4, prior='uniform', transform='identity'),
-        Real(low=-1, high=1, prior='uniform', transform='identity')]
+    >>> dimensions_aslist(search_space)[0]
+    Real(low=0, high=1, prior='uniform', transform='identity')
+   >>> dimensions_aslist(search_space)[1]
+    Integer(low=2, high=4, prior='uniform', transform='identity')
+    >>> dimensions_aslist(search_space)[2]
+    Real(low=-1, high=1, prior='uniform', transform='identity')
 
     """
     params_space_list = [

From 02ae1275954d243531f1e1fad5d826a78bb77a2e Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 22:43:29 +0100
Subject: [PATCH 049/265] Use ordereddict in point_asdict

---
 skopt/utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/skopt/utils.py b/skopt/utils.py
index 7bcd414e0..c634f07b5 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -8,7 +8,7 @@
 from sklearn.ensemble import GradientBoostingRegressor
 from joblib import dump as dump_
 from joblib import load as load_
-
+from collections import OrderedDict
 from .learning import ExtraTreesRegressor
 from .learning import GaussianProcessRegressor
 from .learning import GradientBoostingQuantileRegressor
@@ -19,6 +19,7 @@
 
 from .space import Space, Categorical, Integer, Real, Dimension
 
+
 __all__ = (
     "load",
     "dump",
@@ -475,9 +476,9 @@ def point_asdict(search_space, point_as_list):
     >>> point_asdict(search_space, point_as_list)
     {'name1': 0.66, 'name2': 3, 'name3': -0.15}
     """
-    params_dict = {
-        k: v for k, v in zip(sorted(search_space.keys()), point_as_list)
-    }
+    params_dict = OrderedDict()
+    for k, v in zip(sorted(search_space.keys()), point_as_list):
+        params_dict[k] = v
     return params_dict
 
 

From c119fbcbeecefb9e58959bf34e4fd45e787bcc37 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 22:45:12 +0100
Subject: [PATCH 050/265] Fix typo

---
 skopt/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/utils.py b/skopt/utils.py
index c634f07b5..30c3d386b 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -430,7 +430,7 @@ def dimensions_aslist(search_space):
     ...                 'name2': Integer(2,4), 'name3': Real(-1,1)}
     >>> dimensions_aslist(search_space)[0]
     Real(low=0, high=1, prior='uniform', transform='identity')
-   >>> dimensions_aslist(search_space)[1]
+    >>> dimensions_aslist(search_space)[1]
     Integer(low=2, high=4, prior='uniform', transform='identity')
     >>> dimensions_aslist(search_space)[2]
     Real(low=-1, high=1, prior='uniform', transform='identity')

From 4155b7b94e24ed3d3307121700c6a5b89554ece8 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 22:55:27 +0100
Subject: [PATCH 051/265] fix pep8 and doctest

---
 skopt/plots.py            | 3 ++-
 skopt/tests/test_plots.py | 3 ++-
 skopt/utils.py            | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 5023a5999..2879492d4 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -506,7 +506,8 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     # dependence is to be used instead).
     space = result.space
     if space.n_dims == 1:
-        raise ValueError("plot_objective needs at least two variables. Found only one.")
+        raise ValueError("plot_objective needs at least two"
+                         "variables. Found only one.")
     x_vals = _evaluate_min_params(result, minimum, n_minimum_search)
     if sample_source == "random":
         x_eval = None
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index 4a54c4718..361e10935 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -127,7 +127,8 @@ def test_evaluate_min_params():
 def test_names_dimensions():
     # Define objective
     def objective(x, noise_level=0.1):
-        return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) + np.random.randn() * noise_level
+        return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) +\
+               np.random.randn() * noise_level
 
     # Initialize Optimizer
     opt = Optimizer([(-2.0, 2.0)], n_initial_points=1)
diff --git a/skopt/utils.py b/skopt/utils.py
index 30c3d386b..83d5e27ab 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -462,7 +462,7 @@ def point_asdict(search_space, point_as_list):
 
     Returns
     -------
-    params_dict : dict
+    params_dict : OrderedDict
         dictionary with parameter names as keys to which
         corresponding parameter values are assigned.
 
@@ -474,7 +474,7 @@ def point_asdict(search_space, point_as_list):
     ...                 'name2': Integer(2,4), 'name3': Real(-1,1)}
     >>> point_as_list = [0.66, 3, -0.15]
     >>> point_asdict(search_space, point_as_list)
-    {'name1': 0.66, 'name2': 3, 'name3': -0.15}
+    OrderedDict([('name1', 0.66), ('name2', 3), ('name3', -0.15)])
     """
     params_dict = OrderedDict()
     for k, v in zip(sorted(search_space.keys()), point_as_list):

From df560cee5ac1a5771632bcfe8d1d2654af001628 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 11 Feb 2020 23:47:12 +0100
Subject: [PATCH 052/265] Update zenodo

---
 .zenodo.json | 98 +++-------------------------------------------------
 1 file changed, 4 insertions(+), 94 deletions(-)

diff --git a/.zenodo.json b/.zenodo.json
index 03bb01a54..7d869768a 100644
--- a/.zenodo.json
+++ b/.zenodo.json
@@ -4,7 +4,8 @@
   "creators": [
     {
       "affiliation": "Wild Tree Tech",
-      "name": "Head, Tim"
+      "name": "Head, Tim",
+      "orcid": "0000-0003-0931-3698"
     },
     {
       "affiliation": "Google Brain",
@@ -16,103 +17,12 @@
     },
     {
       "affiliation": "ULi\u00e8ge",
-      "name": "Louppe, Gilles"
+      "name": "Louppe, Gilles",
+      "orcid": "0000-0002-2082-3106"
     },
     {
       "affiliation": "Saarland University",
       "name": "Shcherbatyi, Iaroslav"
-    },
-    {
-      "name": "fcharras"
-    },
-    {
-      "name": "Z\u00e9 Vin\u00edcius"
-    },
-    {
-      "name": "cmmalone"
-    },
-    {
-      "name": "Christopher Schr\u00f6der"
-    },
-    {
-      "name": "nel215"
-    },
-    {
-      "affiliation": "@yldio",
-      "name": "Nuno Campos"
-    },
-    {
-      "name": "Todd Young"
-    },
-    {
-      "affiliation": "Politecnico di Milano",
-      "name": "Stefano Cereda"
-    },
-    {
-      "name": "Thomas Fan"
-    },
-    {
-      "name": "rene-rex"
-    },
-    {
-      "affiliation": "Columbia University",
-      "name": "Kejia (KJ) Shi"
-    },
-    {
-      "affiliation": "Biomedical Informatics Department, Emory School of Medicine",
-      "name": "Justus Schwabedal"
-    },
-    {
-      "name": "carlosdanielcsantos"
-    },
-    {
-      "affiliation": "Hvass Laboratories",
-      "name": "Hvass-Labs"
-    },
-    {
-      "affiliation": "Technical University of Munich",
-      "name": "Mikhail Pak"
-    },
-    {
-      "name": "SoManyUsernamesTaken"
-    },
-    {
-      "affiliation": "UC Berkeley",
-      "name": "Fred Callaway"
-    },
-    {
-      "name": "Lo\u00efc Est\u00e8ve"
-    },
-    {
-      "affiliation": "ENS de Cachan - Paris Saclay University",
-      "name": "Lilian Besson"
-    },
-    {
-      "name": "Mehdi Cherti"
-    },
-    {
-      "affiliation": "Paderborn University",
-      "name": "Karlson Pfannschmidt"
-    },
-    {
-      "affiliation": "Toptal",
-      "name": "Fabian Linzberger"
-    },
-    {
-      "affiliation": "@point8",
-      "name": "Christophe Cauet"
-    },
-    {
-      "affiliation": "10clouds",
-      "name": "Anna Gut"
-    },
-    {
-      "affiliation": "Columbia University Data Science Institute",
-      "name": "Andreas Mueller"
-    },
-    {
-      "affiliation": "DFKI",
-      "name": "Alexander Fabisch"
     }
   ],
   "keywords": [

From aed96bd9f6b3c7276990cc0d551fc673b293c41a Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 12 Feb 2020 00:04:41 +0100
Subject: [PATCH 053/265] Add changelog

---
 CHANGELOG.md             | 21 +++++++++++++++++++++
 doc/whats_new/v0.7.2.rst | 12 ++++++++++--
 doc/whats_new/v0.8.rst   |  2 +-
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0caf27d91..b37b5bfd1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,25 @@
 # Release history
+## Version 0.7.2
+
+## New features
+* Add expected_minimum_random_sampling
+* New plot examples
+* Add more parameter to plot_objective 
+* Return ordereddict in point_asdict
+* update_next() and get_results() added to Optimize
+
+## Bug fixes
+
+* Fix searchcv rank (issue #831)
+* Fix random forest regressor (issue #766)
+* Fix doc examples
+* Fix integer normalize by using round()
+* Fix random forest regressor (Add missing min_impurity_decrease)
+
+## Maintenance
+* Fix license detection in github
+* Add doctest to CI
+
 ## Version 0.7.1
 
 ### New features
diff --git a/doc/whats_new/v0.7.2.rst b/doc/whats_new/v0.7.2.rst
index 66ed2385a..c36b620b2 100644
--- a/doc/whats_new/v0.7.2.rst
+++ b/doc/whats_new/v0.7.2.rst
@@ -1,18 +1,26 @@
 Version 0.7.2
 =============
-**Not released**
 New features
 ------------
 
+* Add expected_minimum_random_sampling
+* New plot examples
+* Add more parameter to plot_objective
+* Return ordereddict in point_asdict
+* update_next() and get_results() added to Optimize
 
 Bug fixes
 ---------
 
-* Fix searchcv rank (issue #830)
+* Fix searchcv rank (issue #831)
 * Fix random forest regressor (issue #766)
+* Fix doc examples
+* Fix integer normalize by using round()
+* Fix random forest regressor (Add missing min_impurity_decrease)
 
 Maintenance
 -----------
 
 * Fix license detection in github
+* Add doctest to CI
 
diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 0fe66437f..58729ea96 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -1,6 +1,6 @@
 Version 0.8
 ===========
-
+**not released**
 New features
 ------------
 

From e1e53f37e489e91e613730b3df475ce0dc865bb0 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 12 Feb 2020 00:07:43 +0100
Subject: [PATCH 054/265] Add whats new links

---
 doc/templates/index.html | 1 +
 doc/whats_new.rst        | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/templates/index.html b/doc/templates/index.html
index 6313c2ede..6afb4c8c2 100644
--- a/doc/templates/index.html
+++ b/doc/templates/index.html
@@ -108,6 +108,7 @@ <h4 class="sk-landing-call-header">News</h4>
         <li><strong>On-going development:</strong>
         <a href="whats_new.html"><strong>What's new</strong> (Changelog)</a>
         </li>
+        <li><strong>Feb 2020.</strong> scikit-optimize 0.7.2  (<a href="whats_new/v0.7.2.html">Changelog</a>).
         <li><strong>Feb 2020.</strong> scikit-optimize 0.7.1  (<a href="whats_new/v0.7.1.html">Changelog</a>).
         <li><strong>Jan 2020.</strong> scikit-optimize 0.7  (<a href="whats_new/v0.7.html">Changelog</a>).
         <li><strong>April 2018.</strong> scikit-optimize 0.6  (<a href="whats_new/v0.6.html">Changelog</a>).
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index c862a9a53..fab8dd92b 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -7,7 +7,8 @@ Release notes for all scikit-optimize releases are linked in this this page.
 
 .. toctree::
     :maxdepth: 1
-
+    Version 0.8 <whats_new/v0.8.rst>
+    Version 0.7.2 <whats_new/v0.7.2.rst>
     Version 0.7.1 <whats_new/v0.7.1.rst>
     Version 0.7 <whats_new/v0.7.rst>
     Version 0.6 <whats_new/v0.6.rst>

From 66540a499226ca61f536e8f840c1aa1fce660276 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 12 Feb 2020 06:35:11 +0100
Subject: [PATCH 055/265] Fix license in zenodo

---
 .zenodo.json             |  2 +-
 doc/whats_new.rst        |  1 +
 doc/whats_new/v0.7.3.rst | 11 +++++++++++
 skopt/__init__.py        |  2 +-
 4 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 doc/whats_new/v0.7.3.rst

diff --git a/.zenodo.json b/.zenodo.json
index 7d869768a..71ed3e4b0 100644
--- a/.zenodo.json
+++ b/.zenodo.json
@@ -34,7 +34,7 @@
     "hyperparameter",
     "bayesian-optimization"
   ],
-  "license": "BSD-3-Clause",
+  "license": "bsd-3-clause",
   "related_identifiers": [
     {
       "identifier": "https://scikit-optimize.github.io",
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index fab8dd92b..e969e6b8f 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -8,6 +8,7 @@ Release notes for all scikit-optimize releases are linked in this this page.
 .. toctree::
     :maxdepth: 1
     Version 0.8 <whats_new/v0.8.rst>
+    Version 0.7.3 <whats_new/v0.7.3.rst>
     Version 0.7.2 <whats_new/v0.7.2.rst>
     Version 0.7.1 <whats_new/v0.7.1.rst>
     Version 0.7 <whats_new/v0.7.rst>
diff --git a/doc/whats_new/v0.7.3.rst b/doc/whats_new/v0.7.3.rst
new file mode 100644
index 000000000..43826b3fc
--- /dev/null
+++ b/doc/whats_new/v0.7.3.rst
@@ -0,0 +1,11 @@
+Version 0.7.3
+=============
+**not released**
+New features
+------------
+
+Bug fixes
+---------
+
+Maintenance
+-----------
diff --git a/skopt/__init__.py b/skopt/__init__.py
index 8abbf080c..d8e995710 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -29,7 +29,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = "0.7.2"
+__version__ = "0.7.3"
 
 if __SKOPT_SETUP__:
     import sys

From fc514d966443e8dcc3cf67f034f02dc7921af2cb Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 12 Feb 2020 08:29:58 +0100
Subject: [PATCH 056/265] Fix pep8

---
 examples/initial-sampling-method-integer.py |   2 +-
 skopt/optimizer/gp.py                       |   3 +-
 skopt/optimizer/optimizer.py                |  10 +-
 skopt/samples/halton.py                     |  21 ++--
 skopt/samples/lhs.py                        | 108 +++++++++++++-------
 skopt/samples/sobol.py                      |  43 ++++----
 skopt/samples/utils.py                      |  13 ++-
 skopt/tests/test_samples.py                 |   4 +-
 8 files changed, 125 insertions(+), 79 deletions(-)

diff --git a/examples/initial-sampling-method-integer.py b/examples/initial-sampling-method-integer.py
index 6e11edd35..4fb3e5264 100644
--- a/examples/initial-sampling-method-integer.py
+++ b/examples/initial-sampling-method-integer.py
@@ -175,4 +175,4 @@ def plot_branin(x, title):
 plt.grid(True)
 plt.ylabel("pdist")
 _ = ax.set_ylim(0, 6)
-_ = ax.set_xticklabels(x_label, rotation=45, fontsize=8)
\ No newline at end of file
+_ = ax.set_xticklabels(x_label, rotation=45, fontsize=8)
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index 607413ff8..a57c67137 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -10,7 +10,8 @@
 
 
 def gp_minimize(func, dimensions, base_estimator=None,
-                n_calls=100, n_random_starts=10, initial_point_generator="random",
+                n_calls=100, n_random_starts=10,
+                initial_point_generator="random",
                 acq_func="gp_hedge", acq_optimizer="auto", x0=None, y0=None,
                 random_state=None, verbose=False, callback=None,
                 n_points=10000, n_restarts_optimizer=5, xi=0.01, kappa=1.96,
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index d28110ca9..d9cf7ed21 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -321,7 +321,8 @@ def __init__(self, dimensions, base_estimator="gp",
                                 "generator function")
             transformer = self.space.get_transformer()
             self.space.set_transformer("normalize")
-            self._initial_samples = self.space.inverse_transform(inv_initial_samples)
+            self._initial_samples = self.space.inverse_transform(
+                inv_initial_samples)
             self.space.set_transformer(transformer)
 
         # record categorical and non-categorical indices
@@ -479,7 +480,8 @@ def _ask(self):
                 return self.space.rvs(random_state=self.rng)[0]
             else:
                 # The samples are evaluated starting form initial_samples[0]
-                return self._initial_samples[len(self._initial_samples) - self._n_initial_points]
+                return self._initial_samples[
+                    len(self._initial_samples) - self._n_initial_points]
 
         else:
             if not self.models:
@@ -499,8 +501,8 @@ def _ask(self):
     def tell(self, x, y, fit=True):
         """Record an observation (or several) of the objective function.
 
-        Provide values of the objective function at points suggested by `ask()`
-        or other points. By default a new model will be fit to all
+        Provide values of the objective function at points suggested by
+        `ask()` or other points. By default a new model will be fit to all
         observations. The new model is used to suggest the next point at
         which to evaluate the objective. This point can be retrieved by calling
         `ask()`.
diff --git a/skopt/samples/halton.py b/skopt/samples/halton.py
index 61a5cebf8..8f0854377 100644
--- a/skopt/samples/halton.py
+++ b/skopt/samples/halton.py
@@ -8,12 +8,13 @@
 
 
 class Halton(InitialPointGenerator):
-    """ In statistics, Halton sequences are sequences used to generate points in space
-    for numerical methods such as Monte Carlo simulations. Although these sequences
-    are deterministic, they are of low discrepancy, that is, appear to be random
-    for many purposes. They were first introduced in 1960 and are an example of
-    a quasi-random number sequence. They generalise the one-dimensional van der
-    Corput sequences.
+    """ In statistics, Halton sequences are sequences used to generate
+    points in space for numerical methods such as Monte Carlo simulations.
+    Although these sequences are deterministic, they are of low discrepancy,
+    that is, appear to be random
+    for many purposes. They were first introduced in 1960 and are an example
+    of a quasi-random number sequence. They generalise the one-dimensional
+    van der Corput sequences.
 
     For ``dim == 1`` the sequence falls back to Van Der Corput sequence.
 
@@ -69,16 +70,16 @@ def generate(self, n_dim, n_samples, random_state=None):
         return np.transpose(out)
 
 
-
 def _van_der_corput_samples(idx, number_base=2):
     """
     Van der Corput samples.
     Create `Van Der Corput` low discrepancy sequence samples.
 
     A van der Corput sequence is an example of the simplest one-dimensional
-    low-discrepancy sequence over the unit interval; it was first described in 1935
-    by the Dutch mathematician J. G. van der Corput. It is constructed by reversing
-    the base-n representation of the sequence of natural numbers (1, 2, 3, ...).
+    low-discrepancy sequence over the unit interval; it was first described in
+    1935 by the Dutch mathematician J. G. van der Corput. It is constructed by
+    reversing the base-n representation of the sequence of natural numbers
+    (1, 2, 3, ...).
 
     In practice, use Halton sequence instead of Van Der Corput, as it is the
     same, but generalized to work in multiple dimensions.
diff --git a/skopt/samples/lhs.py b/skopt/samples/lhs.py
index 2103cf495..d0d56eba4 100644
--- a/skopt/samples/lhs.py
+++ b/skopt/samples/lhs.py
@@ -27,10 +27,14 @@ class Lhs(InitialPointGenerator):
     criterion : str or None, default=None
         When set to None, the LHS is not optimized
 
-            - `correlation` : optimized LHS by minimizing the correlation
-            - `maximin` : optimized LHS by maximizing the minimal pdist
-            - `ratio` : optimized LHS by minimizing the ratio `max(pdist) / min(pdist)`
-            - `ese` : optimized LHS using Enhanced Stochastic Evolutionary Alg.
+        - `correlation` : optimized LHS by minimizing the correlation
+
+        - `maximin` : optimized LHS by maximizing the minimal pdist
+
+        - `ratio` : optimized LHS by minimizing the ratio
+        `max(pdist) / min(pdist)`
+
+        - `ese` : optimized LHS using Enhanced Stochastic Evolutionary Alg.
 
     iterations : int
         Defines the number of iterations for optimizing LHS
@@ -93,12 +97,14 @@ def generate(self, n_dim, n_samples, random_state=None):
             return random_permute_matrix(h, random_state=random_state)
         else:
             internal_lhs = Lhs(lhs_type=self.lhs_type, criterion=None)
-            h_opt = internal_lhs.generate(n_dim, n_samples, random_state=random_state)
+            h_opt = internal_lhs.generate(n_dim, n_samples,
+                                          random_state=random_state)
             if self.criterion == "correlation":
                 mincorr = np.inf
                 for i in range(self.iterations):
                     # Generate a random LHS
-                    h = internal_lhs.generate(n_dim, n_samples, random_state=random_state)
+                    h = internal_lhs.generate(n_dim, n_samples,
+                                              random_state=random_state)
                     r = np.corrcoef(h.T)
                     if np.max(np.abs(r[r != 1])) < mincorr:
                         mincorr = np.max(np.abs(r - np.eye(r.shape[0])))
@@ -108,7 +114,8 @@ def generate(self, n_dim, n_samples, random_state=None):
                 maxdist = 0
                 # Maximize the minimum distance between points
                 for i in range(self.iterations):
-                    h = internal_lhs.generate(n_dim, n_samples, random_state=random_state)
+                    h = internal_lhs.generate(n_dim, n_samples,
+                                              random_state=random_state)
                     d = spatial.distance.pdist(h, 'euclidean')
                     if maxdist < np.min(d):
                         maxdist = np.min(d)
@@ -119,7 +126,8 @@ def generate(self, n_dim, n_samples, random_state=None):
 
                 # Maximize the minimum distance between points
                 for i in range(self.iterations):
-                    h = internal_lhs.generate(n_dim, n_samples, random_state=random_state)
+                    h = internal_lhs.generate(n_dim, n_samples,
+                                              random_state=random_state)
                     p = spatial.distance.pdist(h, 'euclidean')
                     ratio = np.max(p) / np.min(p)
                     if minratio > ratio:
@@ -127,7 +135,8 @@ def generate(self, n_dim, n_samples, random_state=None):
                         h_opt = h.copy()
             elif self.criterion == "ese":
 
-                dm_init = internal_lhs.generate(n_dim, n_samples, random_state=random_state)
+                dm_init = internal_lhs.generate(n_dim, n_samples,
+                                                random_state=random_state)
 
                 if self.ese_threshold_init <= 0.0:
                     threshold = 0.005 * w2_discrepancy_fast(dm_init)
@@ -137,14 +146,17 @@ def generate(self, n_dim, n_samples, random_state=None):
                     num_exchanges = calc_num_candidate(n_samples)
                 else:
                     num_exchanges = self.ese_num_exchanges
-                if self.ese_max_inner <= 0:  # maximum number of inner iterations
+                # maximum number of inner iterations
+                if self.ese_max_inner <= 0:
                     max_inner = calc_max_inner(n_samples, n_dim)
                 else:
                     max_inner = self.ese_max_inner
 
                 dm = dm_init.copy()  # the current design
-                obj_func_best = w2_discrepancy_fast(dm)  # the best value of obj.func. so far
-                obj_func_best_old = w2_discrepancy_fast(dm)  # the old value of obj.func.
+                # the best value of obj.func. so far
+                obj_func_best = w2_discrepancy_fast(dm)
+                # the old value of obj.func.
+                obj_func_best_old = w2_discrepancy_fast(dm)
                 flag_explore = False  # improved flag
 
                 best_evol = []  # Keep track the best solution
@@ -164,26 +176,36 @@ def generate(self, n_dim, n_samples, random_state=None):
                         import itertools
 
                         # Create pairs of all possible combination
-                        pairs = list(itertools.combinations([_ for _ in range(n_samples)], 2))
-                        # Create random choices for the pair of perturbation, w/o replacement
-                        rand_choices = rng.choice(len(pairs), num_exchanges, replace=False)
+                        pairs = list(itertools.combinations(
+                            [_ for _ in range(n_samples)], 2))
+                        # Create random choices for the pair of
+                        # perturbation, w/o replacement
+                        rand_choices = rng.choice(len(pairs), num_exchanges,
+                                                  replace=False)
                         # Initialize the search
                         obj_func_current = np.inf
                         dm_current = dm.copy()
                         for i in rand_choices:
-                            dm_try = dm.copy()  # Always perturb from the design passed in argument
-                            # Do column-wise operation in a given column 'num_dimension'
-                            dm_try[pairs[i][0], num_dimension] = dm[pairs[i][1], num_dimension]
-                            dm_try[pairs[i][1], num_dimension] = dm[pairs[i][0], num_dimension]
+                            # Always perturb from the design passed
+                            # in argument
+                            dm_try = dm.copy()
+                            # Do column-wise operation in a given
+                            # column 'num_dimension'
+                            dm_try[pairs[i][0], num_dimension] = dm[
+                                pairs[i][1], num_dimension]
+                            dm_try[pairs[i][1], num_dimension] = dm[
+                                pairs[i][0], num_dimension]
                             obj_func_try = w2_discrepancy_fast(dm_try)
                             if obj_func_try < obj_func_current:
-                                # Select the best trial from all the perturbation trials
+                                # Select the best trial from all the
+                                # perturbation trials
                                 obj_func_current = obj_func_try
                                 dm_current = dm_try.copy()
 
                         obj_func_try = w2_discrepancy_fast(dm_current)
                         # Check whether solution is acceptable
-                        if (obj_func_try - obj_func) <= threshold * rng.rand():
+                        if (obj_func_try - obj_func) <=\
+                                threshold * rng.rand():
                             # Accept solution
                             dm = dm_current.copy()
                             n_accepted += 1
@@ -196,10 +218,12 @@ def generate(self, n_dim, n_samples, random_state=None):
                                 n_improved += 1
 
                     # Accept/Reject as Best Solution for convergence checking
-                    if ((obj_func_best_old - obj_func_best) / obj_func_best) > 1e-6:
+                    if ((obj_func_best_old - obj_func_best)
+                        / obj_func_best) > 1e-6:
                         # Improvement found
                         obj_func_best_old = obj_func_best
-                        flag_explore = False  # Reset the explore flag after new best found
+                        # Reset the explore flag after new best found
+                        flag_explore = False
                         flag_imp = True
                     else:
                         # Improvement not found
@@ -207,31 +231,43 @@ def generate(self, n_dim, n_samples, random_state=None):
 
                     # Improve vs. Explore Phase and Threshold Update
                     if flag_imp:  # Improve
-                        # New best solution found, carry out improvement process
-                        if (float(n_accepted / num_exchanges) > self.ese_improving_params[0]) & \
+                        # New best solution found, carry out
+                        # improvement process
+                        if (float(n_accepted / num_exchanges) >
+                            self.ese_improving_params[0]) & \
                                 (n_accepted > n_improved):
-                            # Lots acceptance but not all of them is improvement,
-                            # reduce threshold, make it harder to accept a trial
+                            # Lots acceptance but not all of them
+                            # is improvement,
+                            # reduce threshold, make it harder to
+                            # accept a trial
                             threshold *= self.ese_improving_params[1]
                         else:
-                            # Few acceptance or all trials are improvement, increase threshold
+                            # Few acceptance or all trials are improvement,
+                            # increase threshold
                             # make it easier to accept a trial
                             threshold /= self.ese_improving_params[1]
-
-                    else:  # Explore, No new best solution found during last iteration
+                    # Explore, No new best solution found
+                    # during last iteration
+                    else:
                         # Exploring process, warming up vs. cooling down
-                        if n_accepted < self.ese_exploring_params[0] * num_exchanges:
-                            # Reach below limit, increase threshold ("warming up")
+                        if n_accepted < self.ese_exploring_params[0] *\
+                                num_exchanges:
+                            # Reach below limit, increase threshold
+                            # ("warming up")
                             flag_explore = True
-                        elif n_accepted > self.ese_exploring_params[1] * num_exchanges:
-                            # Reach above limit, decrease threshold ("cooling down")
+                        elif n_accepted > self.ese_exploring_params[1] *\
+                                num_exchanges:
+                            # Reach above limit, decrease threshold
+                            # ("cooling down")
                             flag_explore = False
 
                         if flag_explore:
-                            # Ramp up exploration and below upper limit, increase threshold
+                            # Ramp up exploration and below upper limit,
+                            # increase threshold
                             threshold /= self.ese_exploring_params[3]
                         elif not flag_explore:
-                            # Slow down exploration and above lower limit, decrease threshold
+                            # Slow down exploration and above lower limit,
+                            # decrease threshold
                             threshold *= self.ese_exploring_params[2]
 
             return h_opt
diff --git a/skopt/samples/sobol.py b/skopt/samples/sobol.py
index 5a57e0e0c..bacf24278 100644
--- a/skopt/samples/sobol.py
+++ b/skopt/samples/sobol.py
@@ -168,21 +168,21 @@ def init(self, dim_num):
             #  in Bratley and Fox, section 2.
             for j in range(m + 1, self.maxcol + 1):
                 newv = self.v[i - 1, j - m - 1]
-                l = 1
+                p2 = 1
                 for k in range(1, m + 1):
-                    l *= 2
+                    p2 *= 2
                     if includ[k - 1]:
                         newv = np.bitwise_xor(
-                            int(newv), int(l * self.v[i - 1, j - k - 1]))
+                            int(newv), int(p2 * self.v[i - 1, j - k - 1]))
                 self.v[i - 1, j - 1] = newv
         #  Multiply columns of V by appropriate power of 2.
-        l = 1
+        p2 = 1
         for j in range(self.maxcol - 1, 0, -1):
-            l *= 2
-            self.v[0:dim_num, j - 1] = self.v[0:dim_num, j - 1] * l
+            p2 *= 2
+            self.v[0:dim_num, j - 1] = self.v[0:dim_num, j - 1] * p2
 
         #  RECIPD is 1/(common denominator of the elements in V).
-        self.recipd = 1.0 / (2 * l)
+        self.recipd = 1.0 / (2 * p2)
         self.lastq = np.zeros(dim_num)
 
     def generate(self, n_dim, n_samples, random_state=None):
@@ -230,7 +230,8 @@ def _sobol(self, dim_num, seed):
           value to be generated.  On output, SEED has been set to the
           appropriate next value, usually simply SEED+1.
           If SEED is less than 0 on input, it is treated as though it were 0.
-          An input value of 0 requests the first (0-th) element of the sequence.
+          An input value of 0 requests the first (0-th) element of
+          the sequence.
 
         Returns
         -------
@@ -246,14 +247,14 @@ def _sobol(self, dim_num, seed):
         if seed < 0:
             seed = 0
 
-        l = 1
+        pos_lo0 = 1
         if seed == 0:
             self.lastq = np.zeros(dim_num)
 
         elif seed == self.seed_save + 1:
 
             #  Find the position of the right-hand zero in SEED.
-            l = _bit_lo0(seed)
+            pos_lo0 = _bit_lo0(seed)
 
         elif seed <= self.seed_save:
 
@@ -261,29 +262,31 @@ def _sobol(self, dim_num, seed):
             self.lastq = np.zeros(dim_num)
 
             for seed_temp in range(int(self.seed_save), int(seed)):
-                l = _bit_lo0(seed_temp)
+                pos_lo0 = _bit_lo0(seed_temp)
                 for i in range(1, dim_num + 1):
                     self.lastq[i - 1] = np.bitwise_xor(
-                        int(self.lastq[i - 1]), int(self.v[i - 1, l - 1]))
+                        int(self.lastq[i - 1]),
+                        int(self.v[i - 1, pos_lo0 - 1]))
 
-            l = _bit_lo0(seed)
+            pos_lo0 = _bit_lo0(seed)
 
         elif self.seed_save + 1 < seed:
 
             for seed_temp in range(int(self.seed_save + 1), int(seed)):
-                l = _bit_lo0(seed_temp)
+                pos_lo0 = _bit_lo0(seed_temp)
                 for i in range(1, dim_num + 1):
                     self.lastq[i - 1] = np.bitwise_xor(
-                        int(self.lastq[i - 1]), int(self.v[i - 1, l - 1]))
+                        int(self.lastq[i - 1]),
+                        int(self.v[i - 1, pos_lo0 - 1]))
 
-            l = _bit_lo0(seed)
+            pos_lo0 = _bit_lo0(seed)
 
         #  Check that the user is not calling too many times!
-        if self.maxcol < l:
+        if self.maxcol < pos_lo0:
             print('I4_SOBOL - Fatal error!')
             print('  Too many calls!')
             print('  MAXCOL = %d\n' % self.maxcol)
-            print('  L =      %d\n' % l)
+            print('  L =      %d\n' % pos_lo0)
             return
 
         #  Calculate the new components of QUASI.
@@ -291,11 +294,9 @@ def _sobol(self, dim_num, seed):
         for i in range(1, dim_num + 1):
             quasi[i - 1] = self.lastq[i - 1] * self.recipd
             self.lastq[i - 1] = np.bitwise_xor(
-                int(self.lastq[i - 1]), int(self.v[i - 1, l - 1]))
+                int(self.lastq[i - 1]), int(self.v[i - 1, pos_lo0 - 1]))
 
         self.seed_save = seed
         seed += 1
 
         return [quasi, seed]
-
-
diff --git a/skopt/samples/utils.py b/skopt/samples/utils.py
index 14543ca32..f2a5ad966 100644
--- a/skopt/samples/utils.py
+++ b/skopt/samples/utils.py
@@ -41,11 +41,13 @@ def create_primes(threshold):
 
 
 def w2_discrepancy_fast(D):
-    """The vectorized version of wrap-around L2-discrepancy calculation, faster!
+    """The vectorized version of wrap-around L2-discrepancy
+    calculation, faster!
     The formula for the Wrap-Around L2-Discrepancy is taken from Eq.5 of (1)
     :math:`WD^2(D) = -(4/3)^K + 1/N^2 \Sigma_{i,j=1}^{N} \
     Pi_{k=1}^K [3/2 - |x_k^1 - x_k^2| * (1 - |x_k^1 - x_k^2|)]`
-    The implementation below uses a vector operation of numpy array to avoid the
+    The implementation below uses a vector operation of numpy array to
+    avoid the
     nested loop in the more straightforward implementation
 
     Parameters
@@ -62,7 +64,8 @@ def w2_discrepancy_fast(D):
     k = D.shape[1]      # the number of dimension
     delta = [None] * k
     for i in range(k):
-        # loop over dimension to calculate the absolute difference between point
+        # loop over dimension to calculate the absolute difference
+        # between point
         # in a given dimension, note the vectorized operation
         delta[i] = np.abs(D[:, i] - np.reshape(D[:, i], (len(D[:, i]), 1)))
 
@@ -149,8 +152,8 @@ def random_shift(dm, random_state=None):
 
 def calc_num_candidate(n):
     """Calculate the number of candidates from perturbing the current design
-    Recommended in the article is the maximum number of pair combination from a
-    given column divided by a factor of 5.
+    Recommended in the article is the maximum number of pair combination
+    from a given column divided by a factor of 5.
     It is also recommended that the number of candidates to be evaluated does
     not exceed 50
 
diff --git a/skopt/tests/test_samples.py b/skopt/tests/test_samples.py
index 50e4af2ec..db1da7ebb 100644
--- a/skopt/tests/test_samples.py
+++ b/skopt/tests/test_samples.py
@@ -23,6 +23,7 @@
 from skopt.samples import Hammersly, Halton
 from skopt.samples.lhs import Lhs
 
+
 @pytest.mark.fast_test
 def test_lhs_type():
     lhs = Lhs(lhs_type="classic")
@@ -104,7 +105,8 @@ def test_halton():
 
     h = Halton()
     x = h.generate(2, 4)
-    y = np.array([[0.125, 0.625, 0.375, 0.875], [0.4444, 0.7778, 0.2222, 0.5556]]).T
+    y = np.array([[0.125, 0.625, 0.375, 0.875],
+                  [0.4444, 0.7778, 0.2222, 0.5556]]).T
     assert_array_almost_equal(x, y, 1e-3)
 
     samples = h.generate(2, 200)

From a83c2f654c8091c04f3967143c3fc728d64fc8b0 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 12 Feb 2020 11:40:32 +0100
Subject: [PATCH 057/265] Fix failing build

---
 skopt/searchcv.py            |  3 ++-
 skopt/tests/test_searchcv.py | 16 +++++++++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 54c6faced..ad80c10ca 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -181,7 +181,8 @@ class BayesSearchCV(BaseSearchCV):
     ...         'degree': Integer(1,8),
     ...         'kernel': Categorical(['linear', 'poly', 'rbf']),
     ...     },
-    ...     n_iter=32
+    ...     n_iter=32,
+    ...     random_state=0
     ... )
     >>>
     >>> # executes bayesian optimization
diff --git a/skopt/tests/test_searchcv.py b/skopt/tests/test_searchcv.py
index 1a5e1d7d0..9b81fc8bf 100644
--- a/skopt/tests/test_searchcv.py
+++ b/skopt/tests/test_searchcv.py
@@ -41,9 +41,23 @@ def _fit_svc(n_jobs=1, n_points=1, cv=None):
     )
 
     opt.fit(X, y)
-
     assert opt.score(X, y) > 0.9
 
+    opt2 = BayesSearchCV(
+        SVC(),
+        {
+            'C': Real(1e-3, 1e+3, prior='log-uniform'),
+            'gamma': Real(1e-3, 1e+1, prior='log-uniform'),
+            'degree': Integer(1, 3),
+        },
+        n_jobs=n_jobs, n_iter=11, n_points=n_points, cv=cv,
+        random_state=42,
+    )
+
+    opt2.fit(X, y)
+
+    assert opt.score(X, y) == opt2.score(X, y)
+
 
 def test_raise_errors():
 

From f8e37bc0e16b5ab35ba26c8df60a9b5f21dd8fc4 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 12 Feb 2020 17:51:38 +0100
Subject: [PATCH 058/265] Improve documentation and fix pep8

---
 doc/modules/classes.rst                       |  4 +-
 examples/samples/README.txt                   |  6 ++
 .../initial-sampling-method-integer.py        |  0
 .../{ => samples}/initial-sampling-method.py  |  0
 skopt/samples/__init__.py                     |  2 +-
 skopt/samples/halton.py                       |  4 +-
 skopt/samples/hammersly.py                    |  9 ++-
 skopt/samples/lhs.py                          |  4 +-
 skopt/samples/sobol.py                        | 58 ++++++++++---------
 skopt/samples/utils.py                        | 12 ++--
 10 files changed, 57 insertions(+), 42 deletions(-)
 create mode 100644 examples/samples/README.txt
 rename examples/{ => samples}/initial-sampling-method-integer.py (100%)
 rename examples/{ => samples}/initial-sampling-method.py (100%)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 5f28cc4f0..13e96ce33 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -221,7 +221,7 @@ details.
     utils.point_aslist
     utils.use_named_args
 
-.. _space_ref:
+.. _samples_ref:
 
 :mod:`skopt.samples`: Samples
 ===============================
@@ -230,7 +230,7 @@ details.
    :no-members:
    :no-inherited-members:
 
-**User guide:** See the :ref:`space` section for further details.
+**User guide:** See the :ref:`sample` section for further details.
 
 .. currentmodule:: skopt
 
diff --git a/examples/samples/README.txt b/examples/samples/README.txt
new file mode 100644
index 000000000..41d26c2dd
--- /dev/null
+++ b/examples/samples/README.txt
@@ -0,0 +1,6 @@
+.. _samples_examples:
+
+Initial sampling functions
+--------------------------
+
+Examples concerning the :mod:`skopt.samples` module.
diff --git a/examples/initial-sampling-method-integer.py b/examples/samples/initial-sampling-method-integer.py
similarity index 100%
rename from examples/initial-sampling-method-integer.py
rename to examples/samples/initial-sampling-method-integer.py
diff --git a/examples/initial-sampling-method.py b/examples/samples/initial-sampling-method.py
similarity index 100%
rename from examples/initial-sampling-method.py
rename to examples/samples/initial-sampling-method.py
diff --git a/skopt/samples/__init__.py b/skopt/samples/__init__.py
index ef27a9df3..c8c67693c 100644
--- a/skopt/samples/__init__.py
+++ b/skopt/samples/__init__.py
@@ -1,5 +1,5 @@
 """
-Utilities to define somples
+Utilities for generating initial sequences
 """
 from .lhs import Lhs
 from .sobol import Sobol
diff --git a/skopt/samples/halton.py b/skopt/samples/halton.py
index 8f0854377..7cfc43edd 100644
--- a/skopt/samples/halton.py
+++ b/skopt/samples/halton.py
@@ -8,7 +8,8 @@
 
 
 class Halton(InitialPointGenerator):
-    """ In statistics, Halton sequences are sequences used to generate
+    """Creates `Halton` sequence samples.
+    In statistics, Halton sequences are sequences used to generate
     points in space for numerical methods such as Monte Carlo simulations.
     Although these sequences are deterministic, they are of low discrepancy,
     that is, appear to be random
@@ -72,7 +73,6 @@ def generate(self, n_dim, n_samples, random_state=None):
 
 def _van_der_corput_samples(idx, number_base=2):
     """
-    Van der Corput samples.
     Create `Van Der Corput` low discrepancy sequence samples.
 
     A van der Corput sequence is an example of the simplest one-dimensional
diff --git a/skopt/samples/hammersly.py b/skopt/samples/hammersly.py
index 2d131464d..96329e840 100644
--- a/skopt/samples/hammersly.py
+++ b/skopt/samples/hammersly.py
@@ -8,12 +8,15 @@
 
 
 class Hammersly(InitialPointGenerator):
-    """The Hammersley set is equivalent to the Halton sequence, except for one
+    """Creates `Hammersley` sequence samples.
+    The Hammersley set is equivalent to the Halton sequence, except for one
     dimension is replaced with a regular grid. It is not recommended to
     generate a Hammersley sequence more than 10 dimension.
 
     For ``dim == 1`` the sequence falls back to Van Der Corput sequence.
 
+    References
+    ----------
     T-T. Wong, W-S. Luk, and P-A. Heng, "Sampling with Hammersley and Halton
     Points," Journal of Graphics Tools, vol. 2, no. 2, 1997, pp. 9 - 24.
 
@@ -38,10 +41,12 @@ def generate(self, n_dim, n_samples, random_state=None):
         n_dim : int
            The number of dimension
         n_samples : int
-            The order of the Hammersley sequence. Defines the number of samples.
+            The order of the Hammersley sequence.
+            Defines the number of samples.
         random_state : int, RandomState instance, or None (default)
             Set random state to something other than None for reproducible
             results.
+
         Returns
         -------
         np.array, shape=(n_dim, n_samples)
diff --git a/skopt/samples/lhs.py b/skopt/samples/lhs.py
index d0d56eba4..f0bb008cf 100644
--- a/skopt/samples/lhs.py
+++ b/skopt/samples/lhs.py
@@ -218,8 +218,8 @@ def generate(self, n_dim, n_samples, random_state=None):
                                 n_improved += 1
 
                     # Accept/Reject as Best Solution for convergence checking
-                    if ((obj_func_best_old - obj_func_best)
-                        / obj_func_best) > 1e-6:
+                    if ((obj_func_best_old - obj_func_best) /
+                        obj_func_best) > 1e-6:
                         # Improvement found
                         obj_func_best_old = obj_func_best
                         # Reset the explore flag after new best found
diff --git a/skopt/samples/sobol.py b/skopt/samples/sobol.py
index bacf24278..242031d5b 100644
--- a/skopt/samples/sobol.py
+++ b/skopt/samples/sobol.py
@@ -40,33 +40,34 @@ class Sobol(InitialPointGenerator):
     randomize : bool, default=False
         When set to True, random shift is applied
 
-    Reference:
-      Antonov, Saleev,
-      USSR Computational Mathematics and Mathematical Physics,
-      Volume 19, 1980, pages 252 - 256.
-
-      Paul Bratley, Bennett Fox,
-      Algorithm 659:
-      Implementing Sobol's Quasirandom Sequence Generator,
-      ACM Transactions on Mathematical Software,
-      Volume 14, Number 1, pages 88-100, 1988.
-
-      Bennett Fox,
-      Algorithm 647:
-      Implementation and Relative Efficiency of Quasirandom
-      Sequence Generators,
-      ACM Transactions on Mathematical Software,
-      Volume 12, Number 4, pages 362-376, 1986.
-
-      Ilya Sobol,
-      USSR Computational Mathematics and Mathematical Physics,
-      Volume 16, pages 236-242, 1977.
-
-      Ilya Sobol, Levitan,
-      The Production of Points Uniformly Distributed in a Multidimensional
-      Cube (in Russian),
-      Preprint IPM Akad. Nauk SSSR,
-      Number 40, Moscow 1976.
+    References
+    ----------
+    Antonov, Saleev,
+    USSR Computational Mathematics and Mathematical Physics,
+    Volume 19, 1980, pages 252 - 256.
+
+    Paul Bratley, Bennett Fox,
+    Algorithm 659:
+    Implementing Sobol's Quasirandom Sequence Generator,
+    ACM Transactions on Mathematical Software,
+    Volume 14, Number 1, pages 88-100, 1988.
+
+    Bennett Fox,
+    Algorithm 647:
+    Implementation and Relative Efficiency of Quasirandom
+    Sequence Generators,
+    ACM Transactions on Mathematical Software,
+    Volume 12, Number 4, pages 362-376, 1986.
+
+    Ilya Sobol,
+    USSR Computational Mathematics and Mathematical Physics,
+    Volume 16, pages 236-242, 1977.
+
+    Ilya Sobol, Levitan,
+    The Production of Points Uniformly Distributed in a Multidimensional
+    Cube (in Russian),
+    Preprint IPM Akad. Nauk SSSR,
+    Number 40, Moscow 1976.
     """
     def __init__(self, min_skip=0, max_skip=1000, randomize=False):
 
@@ -156,7 +157,8 @@ def init(self, dim_num):
                 j //= 2
                 m += 1
 
-            #  Expand this bit pattern to separate components of the logical array INCLUD.
+            #  Expand this bit pattern to separate components
+            #  of the logical array INCLUD.
             j = self.poly[i - 1]
             includ = np.zeros(m)
             for k in range(m, 0, -1):
diff --git a/skopt/samples/utils.py b/skopt/samples/utils.py
index f2a5ad966..28629ab72 100644
--- a/skopt/samples/utils.py
+++ b/skopt/samples/utils.py
@@ -124,12 +124,14 @@ def _bit_lo0(n):
 
 def random_shift(dm, random_state=None):
     """Random shifting of a vector
-    Randomization of the quasi-MC samples can be achieved in the easiest manner by
+    Randomization of the quasi-MC samples can be achieved
+    in the easiest manner by
     random shift (or the Cranley-Patterson rotation).
-    **Reference:**
-    (1) C. Lemieux, "Monte Carlo and Quasi-Monte Carlo Sampling," Springer
-        Series in Statistics 692, Springer Science+Business Media, New York,
-        2009
+    Refereences
+    -----------
+    C. Lemieux, "Monte Carlo and Quasi-Monte Carlo Sampling," Springer
+    Series in Statistics 692, Springer Science+Business Media, New York,
+    2009
 
     Parameters
     ----------

From 14cf9af85201d065cefc1e85bb1e1802047b6ded Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 13 Feb 2020 13:03:12 +0100
Subject: [PATCH 059/265] Remove name, index and _dimension_names

---
 skopt/space/space.py      | 131 ++++++--------------------------------
 skopt/tests/test_space.py |   4 +-
 skopt/utils.py            |   6 +-
 3 files changed, 23 insertions(+), 118 deletions(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index 967b0728e..4546d9856 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -120,23 +120,6 @@ def check_dimension(dimension, transform=None):
 
 class Dimension(object):
     """Base class for search space dimensions."""
-    def __init__(self, name=None):
-        """
-        Common initialization for all types of search-spaces dimensions.
-
-        Call this at the start of __init__ for all sub-classes.
-        
-        Parameters
-        ----------
-        * `name` [str or None]:
-            Name associated with the dimension.
-        """
-
-        # Name of this dimension.
-        self.name = name
-
-        # Initialize index before it has been set by search-space.
-        self.index = None
 
     prior = None
 
@@ -184,13 +167,7 @@ def transformed_bounds(self):
 
     @property
     def name(self):
-        if self._name is None:
-            # Dimension has no name, so make a default one.
-            n = 'X_{}'.format(self.index)
-        else:
-            # Dimension has a name, so use it.
-            n = self._name
-        return n
+        return self._name
 
     @name.setter
     def name(self, value):
@@ -199,21 +176,6 @@ def name(self, value):
         else:
             raise ValueError("Dimension's name must be either string or None.")
 
-    @property
-    def index(self):
-        """Index of the dimension in the search-space. This follows
-        the order in which the dimensions were added to the search-space.
-        It should not be changed after it has first been set.
-        It is used to give unnamed dimensions a default name,
-        and it is also used e.g. in plotting functions for
-        both named and unnamed dimensions."""
-        return self._index
-
-    @index.setter
-    def index(self, value):
-        self._index = value
-
-
 def _uniform_inclusive(loc=0.0, scale=1.0):
     # like scipy.stats.distributions but inclusive of `high`
     # XXX scale + 1. might not actually be a float after scale if
@@ -326,8 +288,8 @@ def __eq__(self, other):
                 self.transform_ == other.transform_)
 
     def __repr__(self):
-        return "Real(low={}, high={}, prior='{}', transform='{}', name='{}')".format(
-            self.low, self.high, self.prior, self.transform_, self.name)
+        return "Real(low={}, high={}, prior='{}', transform='{}')".format(
+            self.low, self.high, self.prior, self.transform_)
 
     def inverse_transform(self, Xt):
         """Inverse transform samples from the warped space back into the
@@ -612,7 +574,7 @@ def __repr__(self):
         else:
             prior = self.prior
 
-        return "Categorical(categories={}, prior={}, name='{}')".format(cats, prior, self.name)
+        return "Categorical(categories={}, prior={})".format(cats, prior)
 
     def rvs(self, n_samples=None, random_state=None):
         choices = self._rvs.rvs(size=n_samples, random_state=random_state)
@@ -689,25 +651,6 @@ class Space(object):
     def __init__(self, dimensions):
         self.dimensions = [check_dimension(dim) for dim in dimensions]
 
-        # Set the index for all the dimensions.
-        # This is used e.g. in the plotting-functions so we don't
-        # have to return the index with __getitem__ below.
-        # It is important to set this before accessing the dimension-names,
-        # because the index is used for default names if the dimension
-        # is unnamed.
-        for i in range(self.n_dims):
-            self.dimensions[i].index = i
-
-        # Names of all the dimensions in the search-space.
-        # This is also a @property further below, but it may be accessed
-        # many times e.g. in __getitem__ so we compute it only once here.
-        self._dimension_names = [dim.name for dim in self.dimensions]
-
-        # Ensure all dimension names are unique.
-        if len(np.unique(self._dimension_names)) != len(self._dimension_names):
-            raise ValueError("All dimension names must be unique.")
-
-
     def __eq__(self, other):
         return all([a == b for a, b in zip(self.dimensions, other.dimensions)])
 
@@ -726,10 +669,7 @@ def dimension_names(self):
         """
         Names of all the dimensions in the search-space.
         """
-
-        # NOTE: This may be called many times e.g. by __getitem__
-        # so we use a pre-computed list instead of re-computing it every time.
-        return self._dimension_names
+        return [dim.name for dim in self.dimensions]
 
     @property
     def is_real(self):
@@ -953,40 +893,36 @@ def __contains__(self, point):
     def __getitem__(self, dimension_names):
         """
         Lookup and return the search-space dimension with the given name.
-        
+
         This allows for dict-like lookup of dimensions, for example:
         `space['foo']` returns the dimension named 'foo' if it exists,
-        otherwise a `ValueError` exception is raised.
-        
+        otherwise `None` is returned.
+
         It also allows for lookup of a list of dimension-names, for example:
         `space[['foo', 'bar']]` returns the two dimensions named
         'foo' and 'bar' if they exist.
-        
+
         Parameters
         ----------
-        * `dimension_names` [str or list(str)]:
+        dimension_names : str or list(str)
             Name of a single search-space dimension (str).
             List of names for search-space dimensions (list(str)).
 
-        Raises
-        ------
-        * `ValueError`:
-            If there is no search-space dimension with the given name.
-
         Returns
         -------
-        * `dims` [Dimension or list(Dimension)]:
+        dims Dimension, list(Dimension), None
             A single search-space dimension with the given name,
             or a list of search-space dimensions with the given names.
         """
+
         def _get(dimension_name):
             """Helper-function for getting a single dimension."""
 
             # Get the index of the search-space dimension using its name.
-            idx = self._dimension_names.index(dimension_name)
-
-            # Get and return the dimension-object.
-            return self.dimensions[idx]
+            for dim in self.dimensions:
+                if dimension_name == dim.name:
+                    return dim
+            return None
 
         if isinstance(dimension_names, str):
             # Get a single search-space dimension.
@@ -996,7 +932,8 @@ def _get(dimension_name):
             # Note that we do not check whether the names are really strings.
             dims = [_get(dimension_name=name) for name in dimension_names]
         else:
-            msg = "Dimension name should be either string or list of strings, but got {}."
+            msg = "Dimension name should be either string or" \
+                  "list of strings, but got {}."
             raise ValueError(msg.format(type(dimension_names)))
 
         return dims
@@ -1040,35 +977,3 @@ def distance(self, point_a, point_b):
             distance += dim.distance(a, b)
 
         return distance
-
-
-    def point_to_dict(self, x):
-        """Convert a point in the search-space from a list
-        to a dict where the keys are the names of the dimensions.
-
-        NOTE: There is a related function in `utils.point_asdict()`
-        but it takes the search-space as a dict instead.
-        
-        Example
-        -------
-        If `self.dimension_names = ['height', 'width', 'color']`
-        then `point_to_dict(x=[1, 2.0, 'red'])` returns the dict:
-        `{'height': 1, 'width': 2.0, 'color': 'red'}`
-
-        Parameters
-        ----------
-        * `x` [list]:
-            A point in the search-space.
-
-        Returns
-        -------
-        * `x_dict` [dict]
-            The point `x` in the search-space wrapped in a dict.
-            The keys are the names of the dimensions, and the
-            values are from `x`.
-        """
-
-        x_dict = {dim_name: value
-                  for dim_name, value in zip(self.dimension_names, x)}
-
-        return x_dict
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index 98b1c1259..3d575d146 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -175,11 +175,11 @@ def test_categorical_transform_binary():
 def test_categorical_repr():
     small_cat = Categorical([1, 2, 3, 4, 5])
     assert (small_cat.__repr__() ==
-            "Categorical(categories=(1, 2, 3, 4, 5), prior=None, name='X_None')")
+            "Categorical(categories=(1, 2, 3, 4, 5), prior=None)")
 
     big_cat = Categorical([1, 2, 3, 4, 5, 6, 7, 8])
     assert (big_cat.__repr__() ==
-            "Categorical(categories=(1, 2, 3, ..., 6, 7, 8), prior=None, name='X_None')")
+            "Categorical(categories=(1, 2, 3, ..., 6, 7, 8), prior=None')")
 
 
 @pytest.mark.fast_test
diff --git a/skopt/utils.py b/skopt/utils.py
index 756c702d2..54a49abd4 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -601,15 +601,15 @@ def get_samples_dimension(result, index):
 
     Parameters
     ----------
-    * `result` [`OptimizeResult`]
+    result : OptimizeResult
         The optimization results e.g. from calling `gp_minimize()`.
 
-    * `index` [int]:
+    index : int
         Index for a dimension in the search-space.
 
     Returns
     -------
-    * `samples`: [list of either int, float or string]:
+    samples : list of either int, float or string
         The optimization samples for the given dimension.
     """
 

From 4d93aa052c15c441e0ebaeddfdd5fa15080f066d Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 13 Feb 2020 13:07:12 +0100
Subject: [PATCH 060/265] Fix typo

---
 skopt/space/space.py      | 1 +
 skopt/tests/test_space.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index 4546d9856..ab89d2ef5 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -176,6 +176,7 @@ def name(self, value):
         else:
             raise ValueError("Dimension's name must be either string or None.")
 
+
 def _uniform_inclusive(loc=0.0, scale=1.0):
     # like scipy.stats.distributions but inclusive of `high`
     # XXX scale + 1. might not actually be a float after scale if
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index 3d575d146..939321778 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -179,7 +179,7 @@ def test_categorical_repr():
 
     big_cat = Categorical([1, 2, 3, 4, 5, 6, 7, 8])
     assert (big_cat.__repr__() ==
-            "Categorical(categories=(1, 2, 3, ..., 6, 7, 8), prior=None')")
+            'Categorical(categories=(1, 2, 3, ..., 6, 7, 8), prior=None)')
 
 
 @pytest.mark.fast_test

From f99e5ad681acd4b7008f4edac7c496d9576f7d8b Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 13 Feb 2020 13:15:44 +0100
Subject: [PATCH 061/265] Add removed regret_plot

---
 skopt/plots.py | 117 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 101 insertions(+), 16 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index c513ba1ea..a17f7214e 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -53,7 +53,6 @@ def plot_convergence(*args, **kwargs):
     ax : `Axes`
         The matplotlib axes.
     """
-
     # <3 legacy python
     ax = kwargs.get("ax", None)
     true_minimum = kwargs.get("true_minimum", None)
@@ -108,6 +107,91 @@ def plot_convergence(*args, **kwargs):
     return ax
 
 
+def plot_regret(*args, **kwargs):
+    """Plot one or several cumulative regret traces.
+    Parameters
+    ----------
+    args[i] : `OptimizeResult`, list of `OptimizeResult`, or tuple
+        The result(s) for which to plot the cumulative regret trace.
+        - if `OptimizeResult`, then draw the corresponding single trace;
+        - if list of `OptimizeResult`, then draw the corresponding cumulative
+            regret traces in transparency, along with the average cumulative
+            regret trace;
+        - if tuple, then `args[i][0]` should be a string label and `args[i][1]`
+          an `OptimizeResult` or a list of `OptimizeResult`.
+    ax : Axes`, optional
+        The matplotlib axes on which to draw the plot, or `None` to create
+        a new one.
+    true_minimum : float, optional
+        The true minimum value of the function, if known.
+    yscale : None or string, optional
+        The scale for the y-axis.
+    Returns
+    -------
+    ax : `Axes`
+        The matplotlib axes.
+    """
+    # <3 legacy python
+    ax = kwargs.get("ax", None)
+    true_minimum = kwargs.get("true_minimum", None)
+    yscale = kwargs.get("yscale", None)
+
+    if ax is None:
+        ax = plt.gca()
+
+    ax.set_title("Cumulative regret plot")
+    ax.set_xlabel("Number of calls $n$")
+    ax.set_ylabel(r"$\sum_{i=0}^n(f(x_i) - optimum)$ after $n$ calls")
+    ax.grid()
+
+    if yscale is not None:
+        ax.set_yscale(yscale)
+
+    colors = cm.viridis(np.linspace(0.25, 1.0, len(args)))
+
+    if true_minimum is None:
+        results = []
+        for res in args:
+            if isinstance(res, tuple):
+                res = res[1]
+
+            if isinstance(res, OptimizeResult):
+                results.append(res)
+            elif isinstance(res, list):
+                results.extend(res)
+        true_minimum = np.min([np.min(r.func_vals) for r in results])
+
+    for results, color in zip(args, colors):
+        if isinstance(results, tuple):
+            name, results = results
+        else:
+            name = None
+
+        if isinstance(results, OptimizeResult):
+            n_calls = len(results.x_iters)
+            regrets = [np.sum(results.func_vals[:i] - true_minimum)
+                       for i in range(1, n_calls + 1)]
+            ax.plot(range(1, n_calls + 1), regrets, c=color,
+                    marker=".", markersize=12, lw=2, label=name)
+
+        elif isinstance(results, list):
+            n_calls = len(results[0].x_iters)
+            iterations = range(1, n_calls + 1)
+            regrets = [[np.sum(r.func_vals[:i] - true_minimum) for i in
+                        iterations] for r in results]
+
+            for cr in regrets:
+                ax.plot(iterations, cr, c=color, alpha=0.2)
+
+            ax.plot(iterations, np.mean(regrets, axis=0), c=color,
+                    marker=".", markersize=12, lw=2, label=name)
+
+    if name:
+        ax.legend(loc="best")
+
+    return ax
+
+
 def _get_ylim_diagonal(ax):
     """Get the min / max of the ylim for all diagonal plots.
     This is used in _adjust_fig() so the ylim is the same
@@ -507,24 +591,24 @@ def plot_evaluations(result, bins=20, dimension_names=None):
 
     Parameters
     ----------
-    * `result` [`OptimizeResult`]
+    result : `OptimizeResult`
         The optimization results from calling e.g. `gp_minimize()`.
 
-    * `bins` [int, bins=20]:
+    bins : int, bins=20
         Number of bins to use for histograms on the diagonal.
 
-    * `dimension_names` [list(str)]:
+    dimension_names : list(str)
         List of names for search-space dimensions to be used in the plot.
         You can omit `Categorical` dimensions here as they are not supported. 
         If `None` then use all dimensions from the search-space.
 
     Returns
     -------
-    * `fig`: [`Matplotlib.Figure`]:
+    fig : `Matplotlib.Figure`
         The object for the figure.
         For example, call `fig.savefig('plot.png')` to save the plot.
 
-    * `ax`: [`Matplotlib.Axes`]:
+    ax : `Matplotlib.Axes`
         A 2-d matrix of Axes-objects with the sub-plots.
     """
 
@@ -657,26 +741,26 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250,
 
     Parameters
     ----------
-    * `result` [`OptimizeResult`]
+    result : `OptimizeResult`
         The optimization results from calling e.g. `gp_minimize()`.
 
     levels : int, default=10
         Number of levels to draw on the contour plot, passed directly
         to `plt.contour()`.
 
-    * `n_points` [int, default=40]
+    n_points : int, default=40
         Number of points along each dimension where the partial dependence
         is evaluated when generating the contour-plots.
 
-    * `n_samples` [int, default=250]
+    n_samples : int, default=250
         Number of points along each dimension where the partial dependence
         is evaluated when generating the contour-plots.
 
-    * `zscale` [str, default='linear']
+    zscale : str, default='linear'
         Scale to use for the z-axis of the contour plots.
         Either 'log' or linear for all other choices.
 
-    * `dimension_names` [list(str), default=None]:
+    dimension_names : list(str), default=None
         List of names for search-space dimensions to be used in the plot.
         You can omit `Categorical` dimensions here as they are not supported.
         If `None` then use all dimensions from the search-space.
@@ -728,11 +812,11 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250,
 
     Returns
     -------
-    * `fig`: [`Matplotlib.Figure`]:
+    fig : `Matplotlib.Figure`
         The object for the figure.
         For example, call `fig.savefig('plot.png')` to save the plot.
     
-    * `ax`: [`Matplotlib.Axes`]:
+    ax : `Matplotlib.Axes`
         A 2-d matrix of Axes-objects with the sub-plots.
     """
 
@@ -993,11 +1077,12 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0):
 
     Returns
     -------
-    * `fig`: [`Matplotlib.Figure`]:
+    fig : `Matplotlib.Figure`
         The Matplotlib Figure-object.
-        For example, you can save the plot by calling `fig.savefig('file.png')` 
+        For example, you can save the plot by calling
+        `fig.savefig('file.png')`
 
-    * `ax`: [`Matplotlib.Axes`]:
+    ax : `Matplotlib.Axes`
         The Matplotlib Axes-object.
     """
 

From 00548b9193e47b33f684949ac1f4038029becfd0 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 13 Feb 2020 13:23:15 +0100
Subject: [PATCH 062/265] Fix doc strings

---
 skopt/plots.py | 82 +++++++++++++++++++++++++++-----------------------
 1 file changed, 44 insertions(+), 38 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index a17f7214e..0be00bfb7 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -109,23 +109,29 @@ def plot_convergence(*args, **kwargs):
 
 def plot_regret(*args, **kwargs):
     """Plot one or several cumulative regret traces.
+
     Parameters
     ----------
     args[i] : `OptimizeResult`, list of `OptimizeResult`, or tuple
         The result(s) for which to plot the cumulative regret trace.
+
         - if `OptimizeResult`, then draw the corresponding single trace;
         - if list of `OptimizeResult`, then draw the corresponding cumulative
             regret traces in transparency, along with the average cumulative
             regret trace;
         - if tuple, then `args[i][0]` should be a string label and `args[i][1]`
           an `OptimizeResult` or a list of `OptimizeResult`.
+
     ax : Axes`, optional
         The matplotlib axes on which to draw the plot, or `None` to create
         a new one.
+
     true_minimum : float, optional
         The true minimum value of the function, if known.
+
     yscale : None or string, optional
         The scale for the y-axis.
+
     Returns
     -------
     ax : `Axes`
@@ -199,12 +205,12 @@ def _get_ylim_diagonal(ax):
 
     Parameters
     ----------
-    * `ax` [`Matplotlib.Axes`]:
+    ax : `Matplotlib.Axes`
         2-dimensional matrix with Matplotlib Axes objects.
 
     Returns
     -------
-    * `ylim_diagonal` [list(int)]
+    ylim_diagonal : list(int)
         The common min and max ylim for the diagonal plots.
     """
 
@@ -236,20 +242,20 @@ def _adjust_fig(fig, ax, space, ylabel, dimensions):
     
     Parameters
     ----------
-    * `fig` [`Matplotlib.Figure`]:
+    fig : `Matplotlib.Figure`
         Figure-object for the plots.
 
-    * `ax` [`Matplotlib.Axes`]:
+    ax : `Matplotlib.Axes`
         2-dimensional matrix with Matplotlib Axes objects.
 
-    * `space` [`Space`]:
+    space : `Space`
         Search-space object.
 
-    * `ylabel` [`str`]:
+    ylabel : `str`
         String to be printed on the top-left diagonal plot
         e.g. 'Sample Count'.
 
-    * `dimensions` [`list(Dimension)`]:
+    dimensions : `list(Dimension)`
         List of `Dimension` objects used in the plots.
 
     Returns
@@ -364,19 +370,19 @@ def _map_bins(bins, bounds, prior):
 
     Parameters
     ----------
-    * `bins` [int]
+    bins : int
         Number of bins in the histogram.
 
-    * `bounds` [(int, int)]
+    bounds : (int, int)
         Tuple or list with lower- and upper-bounds for a search-space dimension.
 
-    * `prior` [str or None]
+    prior : str or None
         If 'log-uniform' then use log-scaling for the bins,
         otherwise use the original number of bins.
 
     Returns
     -------
-    * `bins_mapped`: [int or np.array(int)]:
+    bins_mapped : int or np.array(int)
          Number of bins for a histogram if no mapping,
          or a log-scaled array of bin-points if mapping is needed.
     """
@@ -408,18 +414,18 @@ def partial_dependence_1D(model, dimension, samples, n_points=40):
 
     Parameters
     ----------
-    * `model`
+    model
         Surrogate model for the objective function.
 
-    * `dimension` [Dimension]
+    dimension : Dimension
         The `Dimension`-object for which to calculate the partial dependence.
 
-    * `samples` [np.array, shape=(n_points, n_dims)]
+    samples : np.array, shape=(n_points, n_dims)
         Randomly sampled and transformed points to use when averaging
         the model function at each of the `n_points` when using partial
         dependence.
 
-    * `n_points` [int, default=40]
+    n_points : int, default=40
         Number of points along each dimension where the partial dependence
         is evaluated.
 
@@ -431,10 +437,10 @@ def partial_dependence_1D(model, dimension, samples, n_points=40):
 
     Returns
     -------
-    * `xi`: [np.array]:
+    xi : np.array
         The points at which the partial dependence was evaluated.
 
-    * `yi`: [np.array]:
+    yi : np.array
         The average value of the modelled objective function at each point `xi`.
     """
 
@@ -491,34 +497,34 @@ def partial_dependence_2D(model, dimension1, dimension2, samples, n_points=40):
 
     Parameters
     ----------
-    * `model`
+    model
         Surrogate model for the objective function.
 
-    * `dimension1` [Dimension]
+    dimension1 : Dimension
         The first `Dimension`-object for which to calculate the
         partial dependence.
 
-    * `dimension2` [Dimension]
+    dimension2 : Dimension
         The second `Dimension`-object for which to calculate the
         partial dependence.
 
-    * `samples` [np.array, shape=(n_points, n_dims)]
+    samples : np.array, shape=(n_points, n_dims)
         Randomly sampled and transformed points to use when averaging
         the model function at each of the `n_points`.
 
-    * `n_points` [int, default=40]
+    n_points : int, default=40
         Number of points along each dimension where the partial dependence
         is evaluated.
 
     Returns
     -------
-    * `xi`: [np.array, shape=n_points]:
+    xi : np.array, shape=n_points
         The points at which the partial dependence was evaluated.
 
-    * `yi`: [np.array, shape=n_points]:
+    yi : np.array, shape=n_points
         The points at which the partial dependence was evaluated.
 
-    * `zi`: [np.array, shape=(n_points, n_points)]:
+    zi : np.array, shape=(n_points, n_points)
         The average value of the objective function at each point `(xi, yi)`.
     """
 
@@ -947,37 +953,37 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
 
     Parameters
     ----------
-    * `result` [`OptimizeResult`]
+    result : `OptimizeResult`
         The optimization results e.g. from calling `gp_minimize()`.
 
-    * `dimension_name1` [str]:
+    dimension_name1 : str
         Name of a dimension in the search-space.
 
-    * `dimension_name2` [str]:
+    dimension_name2 : str
         Name of a dimension in the search-space.
 
-    * `n_samples` [int, default=250]
+    n_samples : int, default=250
         Number of random samples used for estimating the contour-plot
         of the objective function.
 
-    * `n_points` [int, default=40]
+    n_points : int, default=40
         Number of points along each dimension where the partial dependence
         is evaluated when generating the contour-plots.
 
-    * `levels` [int, default=10]
+    levels : int, default=10
         Number of levels to draw on the contour plot.
 
-    * `zscale` [str, default='linear']
+    zscale : str, default='linear'
         Scale to use for the z axis of the contour plots.
         Either 'log' or linear for all other choices.
 
     Returns
     -------
-    * `fig`: [`Matplotlib.Figure`]:
+    fig : `Matplotlib.Figure`
         The Matplotlib Figure-object.
         For example, you can save the plot by calling `fig.savefig('file.png')` 
 
-    * `ax`: [`Matplotlib.Axes`]:
+    ax : `Matplotlib.Axes`
         The Matplotlib Figure-object.
         For example, you can save the plot by calling `fig.savefig('file.png')` 
     """
@@ -1062,16 +1068,16 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0):
 
     Parameters
     ----------
-    * `result` [`OptimizeResult`]
+    result : `OptimizeResult`
         The optimization results e.g. from calling `gp_minimize()`.
 
-    * `dimension_name` [str]:
+    dimension_name : str
         Name of a dimension in the search-space.
 
-    * `bins` [int, bins=20]:
+    bins : int, bins=20
         Number of bins in the histogram.
 
-    * `rotate_labels` [int, rotate_labels=0]:
+    rotate_labels : int, rotate_labels=0
         Degree to rotate category-names on the x-axis.
         Only used for Categorical dimensions.
 

From 0cab51daadd56a28f05fec52a2c89098df1186ed Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 13 Feb 2020 13:40:39 +0100
Subject: [PATCH 063/265] Fix order and restore plot functions

---
 skopt/plots.py | 614 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 371 insertions(+), 243 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 0be00bfb7..55d487aa0 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -579,126 +579,218 @@ def _calc(x, y):
     return xi, yi, zi
 
 
-def plot_evaluations(result, bins=20, dimension_names=None):
+def plot_objective_2D(result, dimension_name1, dimension_name2,
+                      n_points=40, n_samples=250, levels=10, zscale='linear'):
     """
-    Visualize the order in which points were sampled during optimization.
+    Create and return a Matplotlib figure and axes with a landscape
+    contour-plot of the last fitted model of the search-space,
+    overlaid with all the samples from the optimization results,
+    for the two given dimensions of the search-space.
 
-    This creates a 2-d matrix plot where the diagonal plots are histograms
-    that show the distribution of samples for each search-space dimension.
+    This is similar to `plot_objective()` but only for 2 dimensions
+    whose doc-string also has a more extensive explanation.
+    
+    NOTE: Categorical dimensions are not supported.
 
-    The plots below the diagonal are scatter-plots of the samples for
-    all combinations of search-space dimensions.
+    Parameters
+    ----------
+    result : `OptimizeResult`
+        The optimization results e.g. from calling `gp_minimize()`.
 
-    The ordering of the samples are shown as different colour-shades.
+    dimension_name1 : str
+        Name of a dimension in the search-space.
 
-    A red star shows the best found parameters.
+    dimension_name2 : str
+        Name of a dimension in the search-space.
 
-    NOTE: Search-spaces with `Categorical` dimensions are not supported.
+    n_samples : int, default=250
+        Number of random samples used for estimating the contour-plot
+        of the objective function.
 
-    Parameters
-    ----------
-    result : `OptimizeResult`
-        The optimization results from calling e.g. `gp_minimize()`.
+    n_points : int, default=40
+        Number of points along each dimension where the partial dependence
+        is evaluated when generating the contour-plots.
 
-    bins : int, bins=20
-        Number of bins to use for histograms on the diagonal.
+    levels : int, default=10
+        Number of levels to draw on the contour plot.
 
-    dimension_names : list(str)
-        List of names for search-space dimensions to be used in the plot.
-        You can omit `Categorical` dimensions here as they are not supported. 
-        If `None` then use all dimensions from the search-space.
+    zscale : str, default='linear'
+        Scale to use for the z axis of the contour plots.
+        Either 'log' or linear for all other choices.
 
     Returns
     -------
     fig : `Matplotlib.Figure`
-        The object for the figure.
-        For example, call `fig.savefig('plot.png')` to save the plot.
+        The Matplotlib Figure-object.
+        For example, you can save the plot by calling `fig.savefig('file.png')` 
 
     ax : `Matplotlib.Axes`
-        A 2-d matrix of Axes-objects with the sub-plots.
+        The Matplotlib Figure-object.
+        For example, you can save the plot by calling `fig.savefig('file.png')` 
     """
 
     # Get the search-space instance from the optimization results.
     space = result.space
 
-    # Get the relevant search-space dimensions.
-    if dimension_names is None:
-        # Get all dimensions.
-        dimensions = space.dimensions
-    else:
-        # Only get the named dimensions.
-        dimensions = space[dimension_names]
+    # Get the dimension-object, its index in the search-space, and its name.
+    dimension1 = space[dimension_name1]
+    dimension2 = space[dimension_name2]
 
-    # Ensure there are no categorical dimensions.
+    # Ensure dimensions are not Categorical.
     # TODO replace with check_list_types(dimensions, (Integer, Real)) in PR #597
-    if any(isinstance(dim, Categorical) for dim in dimensions):
+    if any(isinstance(dim, Categorical) for dim in [dimension1, dimension2]):
         raise ValueError("Categorical dimension is not supported.")
 
-    # Number of search-space dimensions we are using.
-    n_dims = len(dimensions)
+    # Get the indices for the search-space dimensions.
+    index1 = dimension1.index
+    index2 = dimension2.index
 
-    # Create a figure for plotting a 2-d matrix of sub-plots.
-    fig, ax = plt.subplots(n_dims, n_dims, figsize=(2 * n_dims, 2 * n_dims))
+    # Get the samples from the optimization-log for the relevant dimensions.
+    samples1 = get_samples_dimension(result=result, index=index1)
+    samples2 = get_samples_dimension(result=result, index=index2)
 
-    # Used to plot colour-shades for the sample-ordering.
-    # It is just a range from 0 to the number of samples.
-    sample_order = range(len(result.x_iters))
+    # Get the best-found samples for the relevant dimensions.
+    best_sample1 = result.x[index1]
+    best_sample2 = result.x[index2]
 
-    # For all rows in the 2-d plot matrix.
-    for row in range(n_dims):
-        # Get the search-space dimension for this row.
-        dim_row = dimensions[row]
+    # Get the last fitted model for the search-space.
+    last_model = result.models[-1]
 
-        # Get the index for the search-space dimension.
-        # This is used to lookup that particular dimension in some functions.
-        index_row = dim_row.index
+    # Get new random samples from the search-space and transform if necessary.
+    new_samples = space.rvs(n_samples=n_samples)
+    new_samples = space.transform(new_samples)
 
-        # Get the samples from the optimization-log for this dimension.
-        samples_row = get_samples_dimension(result=result, index=index_row)
+    # Estimate the objective function for these sampled points
+    # using the last fitted model for the search-space.
+    xi, yi, zi = partial_dependence_2D(model=last_model,
+                                       dimension1=dimension1,
+                                       dimension2=dimension2,
+                                       samples=new_samples,
+                                       n_points=n_points)
 
-        # Get the best-found sample for this dimension.
-        best_sample_row = result.x[index_row]
+    # Start a new plot.
+    fig, ax = plt.subplots(nrows=1, ncols=1)
 
-        # Search-space boundary for this dimension.
-        bounds_row = dim_row.bounds
+    # Scale for the z-axis of the contour-plot. Either Log or Linear (None).
+    locator = LogLocator() if zscale == 'log' else None
 
-        # Map the number of bins to a log-space if necessary.
-        bins_mapped = _map_bins(bins=bins,
-                                bounds=dim_row.bounds,
-                                prior=dim_row.prior)
+    # Plot the contour-landscape for the objective function.
+    ax.contourf(xi, yi, zi, levels, locator=locator, cmap='viridis_r')
 
-        # Plot a histogram on the diagonal.
-        ax[row, row].hist(samples_row, bins=bins_mapped, range=bounds_row)
+    # Plot all the parameters that were sampled during optimization.
+    # These are plotted as small black dots.
+    ax.scatter(samples1, samples2, c='black', s=10, linewidths=1)
 
-        # For all columns until the diagonal in the 2-d plot matrix.
-        for col in range(row):
-            # Get the search-space dimension for this column.
-            dim_col = dimensions[col]
+    # Plot the best parameters that were sampled during optimization.
+    # These are plotted as a big red star.
+    ax.scatter(best_sample1, best_sample2,
+               c='red', s=50, linewidths=1, marker='*')
 
-            # Get the index for this search-space dimension.
-            # This is used to lookup that dimension in some functions.
-            index_col = dim_col.index
+    # Use the dimension-names as the labels for the plot-axes.
+    ax.set_xlabel(dimension_name1)
+    ax.set_ylabel(dimension_name2)
 
-            # Get the samples from the optimization-log for that dimension.
-            samples_col = get_samples_dimension(result=result, index=index_col)
+    # Use log-scale on the x-axis?
+    if dimension1.prior == 'log-uniform':
+        ax.set_xscale('log')
 
-            # Plot all the parameters that were sampled during optimization.
-            # These are plotted as small coloured dots, where the colour-shade
-            # indicates the time-progression.
-            ax[row, col].scatter(samples_col, samples_row,
-                                 c=sample_order, s=40, lw=0., cmap='viridis')
+    # Use log-scale on the y-axis?
+    if dimension2.prior == 'log-uniform':
+        ax.set_yscale('log')
 
-            # Get the best-found sample for this dimension.
-            best_sample_col = result.x[index_col]
+    return fig, ax
 
-            # Plot the best parameters that were sampled during optimization.
-            # These are plotted as a big red star.
-            ax[row, col].scatter(best_sample_col, best_sample_row,
-                                 c='red', s=100, lw=0., marker='*')
 
-    # Make various adjustments to the plots.
-    _adjust_fig(fig=fig, ax=ax, space=space,
-                dimensions=dimensions, ylabel="Sample Count")
+def plot_histogram(result, dimension_name, bins=20, rotate_labels=0):
+    """
+    Create and return a Matplotlib figure with a histogram
+    of the samples from the optimization results,
+    for a given dimension of the search-space.
+
+    Parameters
+    ----------
+    result : `OptimizeResult`
+        The optimization results e.g. from calling `gp_minimize()`.
+
+    dimension_name : str
+        Name of a dimension in the search-space.
+
+    bins : int, bins=20
+        Number of bins in the histogram.
+
+    rotate_labels : int, rotate_labels=0
+        Degree to rotate category-names on the x-axis.
+        Only used for Categorical dimensions.
+
+    Returns
+    -------
+    fig : `Matplotlib.Figure`
+        The Matplotlib Figure-object.
+        For example, you can save the plot by calling
+        `fig.savefig('file.png')`
+
+    ax : `Matplotlib.Axes`
+        The Matplotlib Axes-object.
+    """
+
+    # Get the search-space instance from the optimization results.
+    space = result.space
+
+    # Get the dimension-object.
+    dimension = space[dimension_name]
+
+    # Get the samples from the optimization-log for that particular dimension.
+    samples = get_samples_dimension(result=result, index=dimension.index)
+
+    # Start a new plot.
+    fig, ax = plt.subplots(nrows=1, ncols=1)
+
+    if isinstance(dimension, Categorical):
+        # When the search-space dimension is Categorical, it means
+        # that the possible values are strings. Matplotlib's histogram
+        # does not support this, so we have to make a bar-plot instead.
+
+        # NOTE: This only shows the categories that are in the samples.
+        # So if a category was not sampled, it will not be shown here.
+
+        # Count the number of occurrences of the string-categories.
+        counter = Counter(samples)
+
+        # The counter returns a dict where the keys are the category-names
+        # and the values are the number of occurrences for each category.
+        names = list(counter.keys())
+        counts = list(counter.values())
+
+        # Although Matplotlib's docs indicate that the bar() function
+        # can take a list of strings for the x-axis, it doesn't appear to work.
+        # So we hack it by creating a list of integers and setting the
+        # tick-labels with the category-names instead.
+        x = np.arange(len(counts))
+
+        # Plot using bars.
+        ax.bar(x, counts, tick_label=names)
+
+        # Adjust the rotation of the category-names on the x-axis.
+        ax.set_xticklabels(labels=names, rotation=rotate_labels)
+    else:
+        # Otherwise the search-space Dimension is either integer or float,
+        # in which case the histogram can be plotted more easily.
+
+        # Map the number of bins to a log-space if necessary.
+        bins_mapped = _map_bins(bins=bins,
+                                bounds=dimension.bounds,
+                                prior=dimension.prior)
+
+        # Plot the histogram.
+        ax.hist(samples, bins=bins_mapped, range=dimension.bounds)
+
+        # Use log-scale on the x-axis?
+        if dimension.prior == 'log-uniform':
+            ax.set_xscale('log')
+
+    # Set the labels.
+    ax.set_xlabel(dimension_name)
+    ax.set_ylabel('Sample Count')
 
     return fig, ax
 
@@ -716,7 +808,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250,
     objective function, while the plots below the diagonal show
     the effect on the objective function when varying two dimensions.
 
-    The Partial Dependence is calculated by averaging the objective value 
+    The Partial Dependence is calculated by averaging the objective value
     for a number of random samples in the search-space,
     while keeping one or two dimensions fixed at regular intervals. This
     averages out the effect of varying the other dimensions and shows
@@ -821,7 +913,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250,
     fig : `Matplotlib.Figure`
         The object for the figure.
         For example, call `fig.savefig('plot.png')` to save the plot.
-    
+
     ax : `Matplotlib.Axes`
         A 2-d matrix of Axes-objects with the sub-plots.
     """
@@ -938,217 +1030,253 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250,
     return fig, ax
 
 
-def plot_objective_2D(result, dimension_name1, dimension_name2,
-                      n_points=40, n_samples=250, levels=10, zscale='linear'):
+def plot_evaluations(result, bins=20, dimension_names=None):
     """
-    Create and return a Matplotlib figure and axes with a landscape
-    contour-plot of the last fitted model of the search-space,
-    overlaid with all the samples from the optimization results,
-    for the two given dimensions of the search-space.
+    Visualize the order in which points were sampled during optimization.
 
-    This is similar to `plot_objective()` but only for 2 dimensions
-    whose doc-string also has a more extensive explanation.
-    
-    NOTE: Categorical dimensions are not supported.
+    This creates a 2-d matrix plot where the diagonal plots are histograms
+    that show the distribution of samples for each search-space dimension.
 
-    Parameters
-    ----------
-    result : `OptimizeResult`
-        The optimization results e.g. from calling `gp_minimize()`.
+    The plots below the diagonal are scatter-plots of the samples for
+    all combinations of search-space dimensions.
 
-    dimension_name1 : str
-        Name of a dimension in the search-space.
+    The ordering of the samples are shown as different colour-shades.
 
-    dimension_name2 : str
-        Name of a dimension in the search-space.
+    A red star shows the best found parameters.
 
-    n_samples : int, default=250
-        Number of random samples used for estimating the contour-plot
-        of the objective function.
+    NOTE: Search-spaces with `Categorical` dimensions are not supported.
 
-    n_points : int, default=40
-        Number of points along each dimension where the partial dependence
-        is evaluated when generating the contour-plots.
+    Parameters
+    ----------
+    result : `OptimizeResult`
+        The optimization results from calling e.g. `gp_minimize()`.
 
-    levels : int, default=10
-        Number of levels to draw on the contour plot.
+    bins : int, bins=20
+        Number of bins to use for histograms on the diagonal.
 
-    zscale : str, default='linear'
-        Scale to use for the z axis of the contour plots.
-        Either 'log' or linear for all other choices.
+    dimension_names : list(str)
+        List of names for search-space dimensions to be used in the plot.
+        You can omit `Categorical` dimensions here as they are not supported.
+        If `None` then use all dimensions from the search-space.
 
     Returns
     -------
     fig : `Matplotlib.Figure`
-        The Matplotlib Figure-object.
-        For example, you can save the plot by calling `fig.savefig('file.png')` 
+        The object for the figure.
+        For example, call `fig.savefig('plot.png')` to save the plot.
 
     ax : `Matplotlib.Axes`
-        The Matplotlib Figure-object.
-        For example, you can save the plot by calling `fig.savefig('file.png')` 
+        A 2-d matrix of Axes-objects with the sub-plots.
     """
 
     # Get the search-space instance from the optimization results.
     space = result.space
 
-    # Get the dimension-object, its index in the search-space, and its name.
-    dimension1 = space[dimension_name1]
-    dimension2 = space[dimension_name2]
+    # Get the relevant search-space dimensions.
+    if dimension_names is None:
+        # Get all dimensions.
+        dimensions = space.dimensions
+    else:
+        # Only get the named dimensions.
+        dimensions = space[dimension_names]
 
-    # Ensure dimensions are not Categorical.
+    # Ensure there are no categorical dimensions.
     # TODO replace with check_list_types(dimensions, (Integer, Real)) in PR #597
-    if any(isinstance(dim, Categorical) for dim in [dimension1, dimension2]):
+    if any(isinstance(dim, Categorical) for dim in dimensions):
         raise ValueError("Categorical dimension is not supported.")
 
-    # Get the indices for the search-space dimensions.
-    index1 = dimension1.index
-    index2 = dimension2.index
-
-    # Get the samples from the optimization-log for the relevant dimensions.
-    samples1 = get_samples_dimension(result=result, index=index1)
-    samples2 = get_samples_dimension(result=result, index=index2)
-
-    # Get the best-found samples for the relevant dimensions.
-    best_sample1 = result.x[index1]
-    best_sample2 = result.x[index2]
+    # Number of search-space dimensions we are using.
+    n_dims = len(dimensions)
 
-    # Get the last fitted model for the search-space.
-    last_model = result.models[-1]
+    # Create a figure for plotting a 2-d matrix of sub-plots.
+    fig, ax = plt.subplots(n_dims, n_dims, figsize=(2 * n_dims, 2 * n_dims))
 
-    # Get new random samples from the search-space and transform if necessary.
-    new_samples = space.rvs(n_samples=n_samples)
-    new_samples = space.transform(new_samples)
+    # Used to plot colour-shades for the sample-ordering.
+    # It is just a range from 0 to the number of samples.
+    sample_order = range(len(result.x_iters))
 
-    # Estimate the objective function for these sampled points
-    # using the last fitted model for the search-space.
-    xi, yi, zi = partial_dependence_2D(model=last_model,
-                                       dimension1=dimension1,
-                                       dimension2=dimension2,
-                                       samples=new_samples,
-                                       n_points=n_points)
+    # For all rows in the 2-d plot matrix.
+    for row in range(n_dims):
+        # Get the search-space dimension for this row.
+        dim_row = dimensions[row]
 
-    # Start a new plot.
-    fig, ax = plt.subplots(nrows=1, ncols=1)
+        # Get the index for the search-space dimension.
+        # This is used to lookup that particular dimension in some functions.
+        index_row = dim_row.index
 
-    # Scale for the z-axis of the contour-plot. Either Log or Linear (None).
-    locator = LogLocator() if zscale == 'log' else None
+        # Get the samples from the optimization-log for this dimension.
+        samples_row = get_samples_dimension(result=result, index=index_row)
 
-    # Plot the contour-landscape for the objective function.
-    ax.contourf(xi, yi, zi, levels, locator=locator, cmap='viridis_r')
+        # Get the best-found sample for this dimension.
+        best_sample_row = result.x[index_row]
 
-    # Plot all the parameters that were sampled during optimization.
-    # These are plotted as small black dots.
-    ax.scatter(samples1, samples2, c='black', s=10, linewidths=1)
+        # Search-space boundary for this dimension.
+        bounds_row = dim_row.bounds
 
-    # Plot the best parameters that were sampled during optimization.
-    # These are plotted as a big red star.
-    ax.scatter(best_sample1, best_sample2,
-               c='red', s=50, linewidths=1, marker='*')
+        # Map the number of bins to a log-space if necessary.
+        bins_mapped = _map_bins(bins=bins,
+                                bounds=dim_row.bounds,
+                                prior=dim_row.prior)
 
-    # Use the dimension-names as the labels for the plot-axes.
-    ax.set_xlabel(dimension_name1)
-    ax.set_ylabel(dimension_name2)
+        # Plot a histogram on the diagonal.
+        ax[row, row].hist(samples_row, bins=bins_mapped, range=bounds_row)
 
-    # Use log-scale on the x-axis?
-    if dimension1.prior == 'log-uniform':
-        ax.set_xscale('log')
+        # For all columns until the diagonal in the 2-d plot matrix.
+        for col in range(row):
+            # Get the search-space dimension for this column.
+            dim_col = dimensions[col]
 
-    # Use log-scale on the y-axis?
-    if dimension2.prior == 'log-uniform':
-        ax.set_yscale('log')
+            # Get the index for this search-space dimension.
+            # This is used to lookup that dimension in some functions.
+            index_col = dim_col.index
 
-    return fig, ax
+            # Get the samples from the optimization-log for that dimension.
+            samples_col = get_samples_dimension(result=result, index=index_col)
 
+            # Plot all the parameters that were sampled during optimization.
+            # These are plotted as small coloured dots, where the colour-shade
+            # indicates the time-progression.
+            ax[row, col].scatter(samples_col, samples_row,
+                                 c=sample_order, s=40, lw=0., cmap='viridis')
 
-def plot_histogram(result, dimension_name, bins=20, rotate_labels=0):
-    """
-    Create and return a Matplotlib figure with a histogram
-    of the samples from the optimization results,
-    for a given dimension of the search-space.
+            # Get the best-found sample for this dimension.
+            best_sample_col = result.x[index_col]
 
-    Parameters
-    ----------
-    result : `OptimizeResult`
-        The optimization results e.g. from calling `gp_minimize()`.
+            # Plot the best parameters that were sampled during optimization.
+            # These are plotted as a big red star.
+            ax[row, col].scatter(best_sample_col, best_sample_row,
+                                 c='red', s=100, lw=0., marker='*')
 
-    dimension_name : str
-        Name of a dimension in the search-space.
+    # Make various adjustments to the plots.
+    _adjust_fig(fig=fig, ax=ax, space=space,
+                dimensions=dimensions, ylabel="Sample Count")
 
-    bins : int, bins=20
-        Number of bins in the histogram.
+    return fig, ax
 
-    rotate_labels : int, rotate_labels=0
-        Degree to rotate category-names on the x-axis.
-        Only used for Categorical dimensions.
 
+def _map_categories(space, points, minimum):
+    """
+    Map categorical values to integers in a set of points.
     Returns
     -------
-    fig : `Matplotlib.Figure`
-        The Matplotlib Figure-object.
-        For example, you can save the plot by calling
-        `fig.savefig('file.png')`
-
-    ax : `Matplotlib.Axes`
-        The Matplotlib Axes-object.
+   mapped_points : np.array, shape=points.shape
+        A copy of `points` with categoricals replaced with their indices in
+        the corresponding `Dimension`.
+    mapped_minimum : np.array, shape (space.n_dims,)
+        A copy of `minimum` with categoricals replaced with their indices in
+        the corresponding `Dimension`.
+    iscat : np.array, shape (space.n_dims,)
+       Boolean array indicating whether dimension `i` in the `space` is
+       categorical.
     """
+    points = np.asarray(points, dtype=object)  # Allow slicing, preserve cats
+    iscat = np.repeat(False, space.n_dims)
+    min_ = np.zeros(space.n_dims)
+    pts_ = np.zeros(points.shape)
+    for i, dim in enumerate(space.dimensions):
+        if isinstance(dim, Categorical):
+            iscat[i] = True
+            catmap = dict(zip(dim.categories, count()))
+            pts_[:, i] = [catmap[cat] for cat in points[:, i]]
+            min_[i] = catmap[minimum[i]]
+        else:
+            pts_[:, i] = points[:, i]
+            min_[i] = minimum[i]
+    return pts_, min_, iscat
 
-    # Get the search-space instance from the optimization results.
-    space = result.space
-
-    # Get the dimension-object.
-    dimension = space[dimension_name]
-
-    # Get the samples from the optimization-log for that particular dimension.
-    samples = get_samples_dimension(result=result, index=dimension.index)
-
-    # Start a new plot.
-    fig, ax = plt.subplots(nrows=1, ncols=1)
-
-    if isinstance(dimension, Categorical):
-        # When the search-space dimension is Categorical, it means
-        # that the possible values are strings. Matplotlib's histogram
-        # does not support this, so we have to make a bar-plot instead.
-
-        # NOTE: This only shows the categories that are in the samples.
-        # So if a category was not sampled, it will not be shown here.
-
-        # Count the number of occurrences of the string-categories.
-        counter = Counter(samples)
-
-        # The counter returns a dict where the keys are the category-names
-        # and the values are the number of occurrences for each category.
-        names = list(counter.keys())
-        counts = list(counter.values())
-
-        # Although Matplotlib's docs indicate that the bar() function
-        # can take a list of strings for the x-axis, it doesn't appear to work.
-        # So we hack it by creating a list of integers and setting the
-        # tick-labels with the category-names instead.
-        x = np.arange(len(counts))
-
-        # Plot using bars.
-        ax.bar(x, counts, tick_label=names)
 
-        # Adjust the rotation of the category-names on the x-axis.
-        ax.set_xticklabels(labels=names, rotation=rotate_labels)
+def _evenly_sample(dim, n_points):
+    """Return `n_points` evenly spaced points from a Dimension.
+    Parameters
+    ----------
+    dim : `Dimension`
+        The Dimension to sample from.  Can be categorical; evenly-spaced
+        category indices are chosen in order without replacement (result
+        may be smaller than `n_points`).
+    n_points : int
+        The number of points to sample from `dim`.
+    Returns
+    -------
+    xi : np.array
+        The sampled points in the Dimension.  For Categorical
+        dimensions, returns the index of the value in
+        `dim.categories`.
+    xi_transformed : np.array
+        The transformed values of `xi`, for feeding to a model.
+    """
+    cats = np.array(getattr(dim, 'categories', []), dtype=object)
+    if len(cats):  # Sample categoricals while maintaining order
+        xi = np.linspace(0, len(cats) - 1, min(len(cats), n_points),
+                         dtype=int)
+        xi_transformed = dim.transform(cats[xi])
     else:
-        # Otherwise the search-space Dimension is either integer or float,
-        # in which case the histogram can be plotted more easily.
+        bounds = dim.bounds
+        # XXX use linspace(*bounds, n_points) after python2 support ends
+        xi = np.linspace(bounds[0], bounds[1], n_points)
+        xi_transformed = dim.transform(xi)
+    return xi, xi_transformed
 
-        # Map the number of bins to a log-space if necessary.
-        bins_mapped = _map_bins(bins=bins,
-                                bounds=dimension.bounds,
-                                prior=dimension.prior)
 
-        # Plot the histogram.
-        ax.hist(samples, bins=bins_mapped, range=dimension.bounds)
+def _cat_format(dimension, x, _):
+    """Categorical axis tick formatter function.  Returns the name of category
+    `x` in `dimension`.  Used with `matplotlib.ticker.FuncFormatter`."""
+    return str(dimension.categories[int(x)])
 
-        # Use log-scale on the x-axis?
-        if dimension.prior == 'log-uniform':
-            ax.set_xscale('log')
 
-    # Set the labels.
-    ax.set_xlabel(dimension_name)
-    ax.set_ylabel('Sample Count')
-
-    return fig, ax
+def _evaluate_min_params(result, params='result',
+                         n_minimum_search=None,
+                         random_state=None):
+    """Returns the minimum based on `params`"""
+    x_vals = None
+    space = result.space
+    if isinstance(params, str):
+        if params == 'result':
+            # Using the best observed result
+            x_vals = result.x
+        elif params == 'expected_minimum':
+            if result.space.is_partly_categorical:
+                # space is also categorical
+                raise ValueError('expected_minimum does not support any'
+                                 'categorical values')
+            # Do a gradient based minimum search using scipys own minimizer
+            if n_minimum_search:
+                # If a value for
+                # expected_minimum_samples has been parsed
+                x_vals, _ = expected_minimum(
+                    result,
+                    n_random_starts=n_minimum_search,
+                    random_state=random_state)
+            else:  # Use standard of 20 random starting points
+                x_vals, _ = expected_minimum(result,
+                                             n_random_starts=20,
+                                             random_state=random_state)
+        elif params == 'expected_minimum_random':
+            # Do a minimum search by evaluating the function with
+            # n_samples sample values
+            if n_minimum_search:
+                # If a value for
+                # n_minimum_samples has been parsed
+                x_vals, _ = expected_minimum_random_sampling(
+                    result,
+                    n_random_starts=n_minimum_search,
+                    random_state=random_state)
+            else:
+                # Use standard of 10^n_parameters. Note this
+                # becomes very slow for many parameters
+                x_vals, _ = expected_minimum_random_sampling(
+                    result,
+                    n_random_starts=10 ** len(result.x),
+                    random_state=random_state)
+        else:
+            raise ValueError('Argument ´eval_min_params´ must be a valid'
+                             'string (´result´)')
+    elif isinstance(params, list):
+        assert len(params) == len(result.x), 'Argument' \
+            '´eval_min_params´ of type list must have same length as' \
+            'number of features'
+        # Using defined x_values
+        x_vals = params
+    else:
+        raise ValueError('Argument ´eval_min_params´ must'
+                         'be a string or a list')
+    return x_vals

From 96f9b172d0f28d0845267abea81cfbf3871acdfc Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 13 Feb 2020 16:01:04 +0100
Subject: [PATCH 064/265] Add categories back to plot_evaluations

---
 skopt/plots.py       | 62 ++++++++++++++++++++++++++++++--------------
 skopt/space/space.py | 10 ++++++-
 2 files changed, 51 insertions(+), 21 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 55d487aa0..ffc2584e3 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -360,7 +360,7 @@ def _adjust_fig(fig, ax, space, ylabel, dimensions):
             ax[row, col].set_xticklabels([])
 
 
-def _map_bins(bins, bounds, prior):
+def _map_bins(bins, bounds, prior, categories=None):
     """
     For use when plotting histograms.
     Maps the number of bins to a log-scale between the bounds, if necessary.
@@ -386,8 +386,9 @@ def _map_bins(bins, bounds, prior):
          Number of bins for a histogram if no mapping,
          or a log-scaled array of bin-points if mapping is needed.
     """
-
-    if prior == 'log-uniform':
+    if categories is not None:
+        bins_ = len(categories)
+    elif prior == 'log-uniform':
         # Map the number of bins to a log-space for the dimension bounds.
         bounds_log = np.log10(bounds)
         bins_mapped = np.logspace(bounds_log[0], bounds_log[1], bins)
@@ -1044,8 +1045,6 @@ def plot_evaluations(result, bins=20, dimension_names=None):
 
     A red star shows the best found parameters.
 
-    NOTE: Search-spaces with `Categorical` dimensions are not supported.
-
     Parameters
     ----------
     result : `OptimizeResult`
@@ -1054,9 +1053,8 @@ def plot_evaluations(result, bins=20, dimension_names=None):
     bins : int, bins=20
         Number of bins to use for histograms on the diagonal.
 
-    dimension_names : list(str)
+    dimension_names : list of str, default=None
         List of names for search-space dimensions to be used in the plot.
-        You can omit `Categorical` dimensions here as they are not supported.
         If `None` then use all dimensions from the search-space.
 
     Returns
@@ -1071,19 +1069,31 @@ def plot_evaluations(result, bins=20, dimension_names=None):
 
     # Get the search-space instance from the optimization results.
     space = result.space
-
+    # Convert categoricals to integers, so we can ensure consistent ordering.
+    # Assign indices to categories in the order they appear in the Dimension.
+    # Matplotlib's categorical plotting functions are only present in v 2.1+,
+    # and may order categoricals differently in different plots anyway.
+    samples, minimum, iscat = _map_categories(space, result.x_iters, result.x)
+    dimension_names = space.dimension_names
     # Get the relevant search-space dimensions.
     if dimension_names is None:
         # Get all dimensions.
         dimensions = space.dimensions
+        dim_index = []
+        for row in range(space.n_dims):
+            dim_index.append(row)
     else:
         # Only get the named dimensions.
-        dimensions = space[dimension_names]
+        dimensions = []
+        dim_index = []
+        for row in range(space.n_dims):
+            dim_name = space.dimensions[row].name
+            if dim_name is None:
+                dim_name = "X_%d" % row
+            if dim_name in dimension_names:
+                dimensions.append(space.dimensions[row])
+                dim_index.append(row)
 
-    # Ensure there are no categorical dimensions.
-    # TODO replace with check_list_types(dimensions, (Integer, Real)) in PR #597
-    if any(isinstance(dim, Categorical) for dim in dimensions):
-        raise ValueError("Categorical dimension is not supported.")
 
     # Number of search-space dimensions we are using.
     n_dims = len(dimensions)
@@ -1102,7 +1112,7 @@ def plot_evaluations(result, bins=20, dimension_names=None):
 
         # Get the index for the search-space dimension.
         # This is used to lookup that particular dimension in some functions.
-        index_row = dim_row.index
+        index_row = dim_index[row]
 
         # Get the samples from the optimization-log for this dimension.
         samples_row = get_samples_dimension(result=result, index=index_row)
@@ -1110,13 +1120,19 @@ def plot_evaluations(result, bins=20, dimension_names=None):
         # Get the best-found sample for this dimension.
         best_sample_row = result.x[index_row]
 
-        # Search-space boundary for this dimension.
-        bounds_row = dim_row.bounds
+        if iscat[row]:
+            categories = dim_row.categories
+            bounds_row = None
+        else:
+            categories = None
+            # Search-space boundary for this dimension.
+            bounds_row = dim_row.bounds
 
         # Map the number of bins to a log-space if necessary.
         bins_mapped = _map_bins(bins=bins,
                                 bounds=dim_row.bounds,
-                                prior=dim_row.prior)
+                                prior=dim_row.prior,
+                                categories=categories)
 
         # Plot a histogram on the diagonal.
         ax[row, row].hist(samples_row, bins=bins_mapped, range=bounds_row)
@@ -1128,8 +1144,7 @@ def plot_evaluations(result, bins=20, dimension_names=None):
 
             # Get the index for this search-space dimension.
             # This is used to lookup that dimension in some functions.
-            index_col = dim_col.index
-
+            index_col = dim_index[col]
             # Get the samples from the optimization-log for that dimension.
             samples_col = get_samples_dimension(result=result, index=index_col)
 
@@ -1157,14 +1172,17 @@ def plot_evaluations(result, bins=20, dimension_names=None):
 def _map_categories(space, points, minimum):
     """
     Map categorical values to integers in a set of points.
+
     Returns
     -------
-   mapped_points : np.array, shape=points.shape
+    mapped_points : np.array, shape=points.shape
         A copy of `points` with categoricals replaced with their indices in
         the corresponding `Dimension`.
+
     mapped_minimum : np.array, shape (space.n_dims,)
         A copy of `minimum` with categoricals replaced with their indices in
         the corresponding `Dimension`.
+
     iscat : np.array, shape (space.n_dims,)
        Boolean array indicating whether dimension `i` in the `space` is
        categorical.
@@ -1187,20 +1205,24 @@ def _map_categories(space, points, minimum):
 
 def _evenly_sample(dim, n_points):
     """Return `n_points` evenly spaced points from a Dimension.
+
     Parameters
     ----------
     dim : `Dimension`
         The Dimension to sample from.  Can be categorical; evenly-spaced
         category indices are chosen in order without replacement (result
         may be smaller than `n_points`).
+
     n_points : int
         The number of points to sample from `dim`.
+
     Returns
     -------
     xi : np.array
         The sampled points in the Dimension.  For Categorical
         dimensions, returns the index of the value in
         `dim.categories`.
+
     xi_transformed : np.array
         The transformed values of `xi`, for feeding to a model.
     """
diff --git a/skopt/space/space.py b/skopt/space/space.py
index ab89d2ef5..32434dd27 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -670,7 +670,15 @@ def dimension_names(self):
         """
         Names of all the dimensions in the search-space.
         """
-        return [dim.name for dim in self.dimensions]
+        index = 0
+        names = []
+        for dim in self.dimensions:
+            if dim.name is None:
+                names.append("X_%d" % index)
+            else:
+                names.append(dim.name)
+            index += 1
+        return names
 
     @property
     def is_real(self):

From ef069b15e563f2833ffebfd0e350a12ea0d18328 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 14 Feb 2020 14:51:33 +0100
Subject: [PATCH 065/265] Add plot_gaussian_process plot function

* Let examples use plot_gaussian_process
---
 doc/modules/classes.rst                       |   1 +
 examples/bayesian-optimization.py             | 128 ++++----------
 examples/exploration-vs-exploitation.py       |  78 +++------
 ...optimizer-with-different-base-estimator.py |  88 +++-------
 skopt/optimizer/optimizer.py                  |  27 +--
 skopt/plots.py                                | 163 ++++++++++++++++++
 6 files changed, 264 insertions(+), 221 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index a411f2703..71df2f9a5 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -190,6 +190,7 @@ details.
     plots.partial_dependence
     plots.plot_convergence
     plots.plot_evaluations
+    plots.plot_gaussian_process
     plots.plot_objective
     plots.plot_regret
 
diff --git a/examples/bayesian-optimization.py b/examples/bayesian-optimization.py
index 4091b3137..c04ada91c 100644
--- a/examples/bayesian-optimization.py
+++ b/examples/bayesian-optimization.py
@@ -19,13 +19,15 @@
 under the constraints that
 
 - :math:`f` is a black box for which no closed form is known
-    (nor its gradients);
+  (nor its gradients);
 - :math:`f` is expensive to evaluate;
 - and evaluations of :math:`y = f(x)` may be noisy.
 
 **Disclaimer.** If you do not have these constraints, then there
 is certainly a better optimization algorithm than Bayesian optimization.
 
+This example uses :class:`plots.plot_gaussian_process` which is available
+since version 0.7.3.
 
 Bayesian optimization loop
 --------------------------
@@ -33,15 +35,12 @@
 For :math:`t=1:T`:
 
 1. Given observations :math:`(x_i, y_i=f(x_i))` for :math:`i=1:t`, build a
-    probabilistic model for the objective :math:`f`. Integrate out all
-    possible true functions, using Gaussian process regression.
+   probabilistic model for the objective :math:`f`. Integrate out all
+   possible true functions, using Gaussian process regression.
 
 2. optimize a cheap acquisition/utility function $u$ based on the posterior
-    distribution for sampling the next point.
-
-    .. math::
-        x_{t+1} = arg \min_x u(x)
-
+   distribution for sampling the next point.
+   .. math::`x_{t+1} = arg \min_x u(x)`
    Exploit uncertainty to balance exploration against exploitation.
 
 3. Sample the next observation :math:`y_{t+1}` at :math:`x_{t+1}`.
@@ -54,10 +53,8 @@
 tried next:
 
 - Expected improvement (default):
-    :math:`-EI(x) = -\mathbb{E} [f(x) - f(x_t^+)]`
-
+  :math:`-EI(x) = -\mathbb{E} [f(x) - f(x_t^+)]`
 - Lower confidence bound: :math:`LCB(x) = \mu_{GP}(x) + \kappa \sigma_{GP}(x)`
-
 - Probability of improvement: :math:`-PI(x) = -P(f(x) \geq f(x_t^+) + \kappa)`
 
 where :math:`x_t^+` is the best point observed so far.
@@ -129,7 +126,7 @@ def f(x, noise_level=noise_level):
 # - `fun` [float]: function value at the minimum.
 # - `models`: surrogate models used for each iteration.
 # - `x_iters` [array]:
-#    location of function evaluation for each iteration.
+#   location of function evaluation for each iteration.
 # - `func_vals` [array]: function value for each iteration.
 # - `space` [Space]: the optimization space.
 # - `specs` [dict]: parameters passed to the function.
@@ -150,73 +147,38 @@ def f(x, noise_level=noise_level):
 # 1. The approximation of the fit gp model to the original function.
 # 2. The acquisition values that determine the next point to be queried.
 
-from skopt.acquisition import gaussian_ei
-
 plt.rcParams["figure.figsize"] = (8, 14)
-
-x = np.linspace(-2, 2, 400).reshape(-1, 1)
-x_gp = res.space.transform(x.tolist())
-fx = np.array([f(x_i, noise_level=0.0) for x_i in x])
+def f_wo_noise(x):
+    return f(x, noise_level=0)
 
 #############################################################################
 # Plot the 5 iterations following the 5 random points
-
+from skopt.plots import plot_gaussian_process
 for n_iter in range(5):
-    gp = res.models[n_iter]
-    curr_x_iters = res.x_iters[:5+n_iter]
-    curr_func_vals = res.func_vals[:5+n_iter]
-
     # Plot true function.
     plt.subplot(5, 2, 2*n_iter+1)
-    plt.plot(x, fx, "r--", label="True (unknown)")
-    plt.fill(np.concatenate([x, x[::-1]]),
-             np.concatenate([fx - 1.9600 * noise_level,
-                             fx[::-1] + 1.9600 * noise_level]),
-             alpha=.2, fc="r", ec="None")
-
-    # Plot GP(x) + contours
-    y_pred, sigma = gp.predict(x_gp, return_std=True)
-    plt.plot(x, y_pred, "g--", label=r"$\mu_{GP}(x)$")
-    plt.fill(np.concatenate([x, x[::-1]]),
-             np.concatenate([y_pred - 1.9600 * sigma,
-                             (y_pred + 1.9600 * sigma)[::-1]]),
-             alpha=.2, fc="g", ec="None")
-
-    # Plot sampled points
-    plt.plot(curr_x_iters, curr_func_vals,
-             "r.", markersize=8, label="Observations")
-
-    # Adjust plot layout
-    plt.grid()
 
     if n_iter == 0:
-        plt.legend(loc="best", prop={'size': 6}, numpoints=1)
-
-    if n_iter != 4:
-        plt.tick_params(axis='x', which='both', bottom='off',
-                        top='off', labelbottom='off')
-
+        show_legend = True
+    else:
+        show_legend = False
+
+    ax = plot_gaussian_process(res, n_calls=n_iter,
+                               objective=f_wo_noise,
+                               noise_level=noise_level,
+                               show_legend=show_legend, show_title=False,
+                               show_next_point=False, show_acq_func=False)
+    ax.set_ylabel("")
+    ax.set_xlabel("")
     # Plot EI(x)
     plt.subplot(5, 2, 2*n_iter+2)
-    acq = gaussian_ei(x_gp, gp, y_opt=np.min(curr_func_vals))
-    plt.plot(x, acq, "b", label="EI(x)")
-    plt.fill_between(x.ravel(), -2.0, acq.ravel(), alpha=0.3, color='blue')
-
-    next_x = res.x_iters[5+n_iter]
-    next_acq = gaussian_ei(res.space.transform([next_x]), gp,
-                           y_opt=np.min(curr_func_vals))
-    plt.plot(next_x, next_acq, "bo", markersize=6, label="Next query point")
-
-    # Adjust plot layout
-    plt.ylim(0, 0.1)
-    plt.grid()
-
-    if n_iter == 0:
-        plt.legend(loc="best", prop={'size': 6}, numpoints=1)
-
-    if n_iter != 4:
-        plt.tick_params(axis='x', which='both', bottom='off',
-                        top='off', labelbottom='off')
+    ax = plot_gaussian_process(res, n_calls=n_iter,
+                               show_legend=show_legend, show_title=False,
+                               show_mu=False, show_acq_func=True,
+                               show_observations=False,
+                               show_next_point=True)
+    ax.set_ylabel("")
+    ax.set_xlabel("")
 
 plt.show()
 
@@ -242,33 +204,7 @@ def f(x, noise_level=noise_level):
 plt.rcParams["figure.figsize"] = (6, 4)
 
 # Plot f(x) + contours
-x = np.linspace(-2, 2, 400).reshape(-1, 1)
-x_gp = res.space.transform(x.tolist())
-
-fx = [f(x_i, noise_level=0.0) for x_i in x]
-plt.plot(x, fx, "r--", label="True (unknown)")
-plt.fill(np.concatenate([x, x[::-1]]),
-         np.concatenate(([fx_i - 1.9600 * noise_level for fx_i in fx],
-                         [fx_i + 1.9600 * noise_level for fx_i in fx[::-1]])),
-         alpha=.2, fc="r", ec="None")
-
-# Plot GP(x) + contours
-gp = res.models[-1]
-y_pred, sigma = gp.predict(x_gp, return_std=True)
-
-plt.plot(x, y_pred, "g--", label=r"$\mu_{GP}(x)$")
-plt.fill(np.concatenate([x, x[::-1]]),
-         np.concatenate([y_pred - 1.9600 * sigma,
-                         (y_pred + 1.9600 * sigma)[::-1]]),
-         alpha=.2, fc="g", ec="None")
-
-# Plot sampled points
-plt.plot(res.x_iters,
-         res.func_vals,
-         "r.", markersize=15, label="Observations")
-
-plt.title(r"$x^* = %.4f, f(x^*) = %.4f$" % (res.x[0], res.fun))
-plt.legend(loc="best", prop={'size': 8}, numpoints=1)
-plt.grid()
+_ = plot_gaussian_process(res, objective=f_wo_noise,
+                          noise_level=noise_level)
 
 plt.show()
diff --git a/examples/exploration-vs-exploitation.py b/examples/exploration-vs-exploitation.py
index a311cd335..78fc740e1 100644
--- a/examples/exploration-vs-exploitation.py
+++ b/examples/exploration-vs-exploitation.py
@@ -23,10 +23,12 @@
 "acq_func_kwargs". This is a dict of extra arguments for the aqcuisittion
 function.
 
-If you want opt.ask() to give a new acquisition value imdediatly after
+If you want opt.ask() to give a new acquisition value immediately after
 tweaking kappa or xi call opt.update_next(). This ensures that the next
 value is updated with the new acquisition parameters.
 
+This example uses :class:`plots.plot_gaussian_process` which is available
+since version 0.7.3.
 """
 print(__doc__)
 
@@ -40,11 +42,12 @@
 # -----------
 # First we define our objective like in the ask-and-tell example notebook and
 # define a plotting function. We do however only use on initial random point.
-# All points afterthe first one is therefore choosen by the acquisition
+# All points after the first one is therefore choosen by the acquisition
 # function.
 
 from skopt.learning import ExtraTreesRegressor
 from skopt import Optimizer
+from skopt.plots import plot_gaussian_process
 
 noise_level = 0.1
 
@@ -53,6 +56,8 @@
 def objective(x, noise_level=noise_level):
     return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) +\
            np.random.randn() * noise_level
+def objective_wo_noise(x):
+    return objective(x, noise_level=0)
 
 #############################################################################
 
@@ -60,45 +65,12 @@ def objective(x, noise_level=noise_level):
                 acq_optimizer="sampling")
 
 #############################################################################
+# Plotting parameters
 
-x = np.linspace(-2, 2, 400).reshape(-1, 1)
-fx = np.array([objective(x_i, noise_level=0.0) for x_i in x])
-
-#############################################################################
-
-from skopt.acquisition import gaussian_ei
-def plot_optimizer(opt, x, fx):
-    model = opt.models[-1]
-    x_model = opt.space.transform(x.tolist())
-
-    # Plot true function.
-    plt.plot(x, fx, "r--", label="True (unknown)")
-    plt.fill(np.concatenate([x, x[::-1]]),
-             np.concatenate([fx - 1.9600 * noise_level,
-                             fx[::-1] + 1.9600 * noise_level]),
-             alpha=.2, fc="r", ec="None")
-
-    # Plot Model(x) + contours
-    y_pred, sigma = model.predict(x_model, return_std=True)
-    plt.plot(x, y_pred, "g--", label=r"$\mu(x)$")
-    plt.fill(np.concatenate([x, x[::-1]]),
-             np.concatenate([y_pred - 1.9600 * sigma,
-                             (y_pred + 1.9600 * sigma)[::-1]]),
-             alpha=.2, fc="g", ec="None")
-
-    # Plot sampled points
-    plt.plot(opt.Xi, opt.yi,
-             "r.", markersize=8, label="Observations")
-
-    acq = gaussian_ei(x_model, model, y_opt=np.min(opt.yi))
-    # shift down to make a better plot
-    acq = 4 * acq - 2
-    plt.plot(x, acq, "b", label="EI(x)")
-    plt.fill_between(x.ravel(), -2.0, acq.ravel(), alpha=0.3, color='blue')
-
-    # Adjust plot layout
-    plt.grid()
-    plt.legend(loc='best')
+plot_args = {"objective": objective_wo_noise,
+             "noise_level": noise_level, "show_legend": True,
+             "show_title": True, "show_next_point": False,
+             "show_acq_func": True}
 
 #############################################################################
 # We run a an optimization loop with standard settings
@@ -108,7 +80,7 @@ def plot_optimizer(opt, x, fx):
     f_val = objective(next_x)
     opt.tell(next_x, f_val)
 # The same output could be created with opt.run(objective, n_iter=30)
-plot_optimizer(opt, x, fx)
+_ = plot_gaussian_process(opt.get_result(), **plot_args)
 
 #############################################################################
 # We see that some minima is found and "exploited"
@@ -123,55 +95,51 @@ def plot_optimizer(opt, x, fx):
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
 opt.run(objective, n_iter=20)
-plot_optimizer(opt, x, fx)
+_ = plot_gaussian_process(opt.get_result(), **plot_args)
 #############################################################################
 # We see that the points are more random now.
 #
 # This works both for kappa when using acq_func="LCB":
 
-#############################################################################
 opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
                 acq_func="LCB", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
 opt.run(objective, n_iter=20)
-plot_optimizer(opt, x, fx)
+_ = plot_gaussian_process(opt.get_result(), **plot_args)
 #############################################################################
 # And for xi when using acq_func="EI": or acq_func="PI":
 
-#############################################################################
-
 opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
                 acq_func="PI", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
 opt.run(objective, n_iter=20)
-plot_optimizer(opt, x, fx)
+_ = plot_gaussian_process(opt.get_result(), **plot_args)
 #############################################################################
 # We can also favor exploitaton:
 acq_func_kwargs = {"xi": 0.000001, "kappa": 0.001}
-
 #############################################################################
 opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
                 acq_func="LCB", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
 opt.run(objective, n_iter=20)
-plot_optimizer(opt, x, fx)
+_ = plot_gaussian_process(opt.get_result(), **plot_args)
 #############################################################################
 opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
                 acq_func="EI", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
 opt.run(objective, n_iter=20)
-plot_optimizer(opt, x, fx)
+_ = plot_gaussian_process(opt.get_result(), **plot_args)
 #############################################################################
 opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
                 acq_func="PI", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
 opt.run(objective, n_iter=20)
-plot_optimizer(opt, x, fx)
+_ = plot_gaussian_process(opt.get_result(), **plot_args)
 
 #############################################################################
 # Note that negative values does not work with the "PI"-acquisition function
@@ -184,14 +152,14 @@ def plot_optimizer(opt, x, fx):
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
 opt.run(objective, n_iter=20)
-plot_optimizer(opt, x, fx)
+_ = plot_gaussian_process(opt.get_result(), **plot_args)
 #############################################################################
 opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
                 acq_func="EI", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
 opt.run(objective, n_iter=20)
-plot_optimizer(opt, x, fx)
+_ = plot_gaussian_process(opt.get_result(), **plot_args)
 #############################################################################
 # Changing kappa and xi on the go
 # -------------------------------
@@ -208,7 +176,7 @@ def plot_optimizer(opt, x, fx):
 opt.acq_func_kwargs
 #############################################################################
 opt.run(objective, n_iter=20)
-plot_optimizer(opt, x, fx)
+_ = plot_gaussian_process(opt.get_result(), **plot_args)
 #############################################################################
 acq_func_kwargs = {"kappa": 100000}
 #############################################################################
@@ -217,4 +185,4 @@ def plot_optimizer(opt, x, fx):
 opt.update_next()
 #############################################################################
 opt.run(objective, n_iter=20)
-plot_optimizer(opt, x, fx)
+_ = plot_gaussian_process(opt.get_result(), **plot_args)
diff --git a/examples/optimizer-with-different-base-estimator.py b/examples/optimizer-with-different-base-estimator.py
index fc771a619..e51d21794 100644
--- a/examples/optimizer-with-different-base-estimator.py
+++ b/examples/optimizer-with-different-base-estimator.py
@@ -12,6 +12,8 @@
 To use different base_estimator or create a regressor with different parameters,
 we can create a regressor object and set it as kernel.
 
+This example uses :class:`plots.plot_gaussian_process` which is available
+since version 0.7.3.
 """
 print(__doc__)
 
@@ -34,6 +36,8 @@
 def objective(x, noise_level=noise_level):
     return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2))\
            + np.random.randn() * noise_level
+def objective_wo_noise(x):
+    return objective(x, noise_level=0)
 
 #############################################################################
 
@@ -43,72 +47,37 @@ def objective(x, noise_level=noise_level):
 
 #############################################################################
 
-x = np.linspace(-2, 2, 400).reshape(-1, 1)
-fx = np.array([objective(x_i, noise_level=0.0) for x_i in x])
+from skopt.plots import plot_gaussian_process
 
-#############################################################################
-
-from skopt.acquisition import gaussian_ei
-
-def plot_optimizer(res, next_x, x, fx, n_iter, max_iters=5):
-    x_gp = res.space.transform(x.tolist())
-    gp = res.models[-1]
-    curr_x_iters = res.x_iters
-    curr_func_vals = res.func_vals
-
-    # Plot true function.
+def plot_optimizer(res, next_x, n_iter, max_iters=5):
+    if n_iter == 0:
+        show_legend = True
+    else:
+        show_legend = False
     ax = plt.subplot(max_iters, 2, 2 * n_iter + 1)
-    plt.plot(x, fx, "r--", label="True (unknown)")
-    plt.fill(np.concatenate([x, x[::-1]]),
-             np.concatenate([fx - 1.9600 * noise_level,
-                             fx[::-1] + 1.9600 * noise_level]),
-             alpha=.2, fc="r", ec="None")
+    # Plot GP(x) + contours
+    ax = plot_gaussian_process(res,  ax=ax,
+                               objective=objective_wo_noise,
+                               noise_level=noise_level,
+                               show_legend=show_legend, show_title=True,
+                               show_next_point=False, show_acq_func=False)
+    ax.set_ylabel("")
+    ax.set_xlabel("")
     if n_iter < max_iters - 1:
         ax.get_xaxis().set_ticklabels([])
-    # Plot GP(x) + contours
-    y_pred, sigma = gp.predict(x_gp, return_std=True)
-    plt.plot(x, y_pred, "g--", label=r"$\mu_{GP}(x)$")
-    plt.fill(np.concatenate([x, x[::-1]]),
-             np.concatenate([y_pred - 1.9600 * sigma,
-                             (y_pred + 1.9600 * sigma)[::-1]]),
-             alpha=.2, fc="g", ec="None")
-
-    # Plot sampled points
-    plt.plot(curr_x_iters, curr_func_vals,
-             "r.", markersize=8, label="Observations")
-    plt.title(r"x* = %.4f, f(x*) = %.4f" % (res.x[0], res.fun))
-    # Adjust plot layout
-    plt.grid()
-
-    if n_iter == 0:
-        plt.legend(loc="best", prop={'size': 6}, numpoints=1)
-
-    if n_iter != 4:
-        plt.tick_params(axis='x', which='both', bottom='off',
-                        top='off', labelbottom='off')
-
     # Plot EI(x)
     ax = plt.subplot(max_iters, 2, 2 * n_iter + 2)
-    acq = gaussian_ei(x_gp, gp, y_opt=np.min(curr_func_vals))
-    plt.plot(x, acq, "b", label="EI(x)")
-    plt.fill_between(x.ravel(), -2.0, acq.ravel(), alpha=0.3, color='blue')
-
+    ax = plot_gaussian_process(res, ax=ax,
+                               noise_level=noise_level,
+                               show_legend=show_legend, show_title=False,
+                               show_next_point=True, show_acq_func=True,
+                               next_x=next_x, show_observations=False,
+                               show_mu=False)
+    ax.set_ylabel("")
+    ax.set_xlabel("")
     if n_iter < max_iters - 1:
         ax.get_xaxis().set_ticklabels([])
 
-    next_acq = gaussian_ei(res.space.transform([next_x]), gp,
-                           y_opt=np.min(curr_func_vals))
-    plt.plot(next_x, next_acq, "bo", markersize=6, label="Next query point")
-
-    # Adjust plot layout
-    plt.ylim(0, 0.07)
-    plt.grid()
-    if n_iter == 0:
-        plt.legend(loc="best", prop={'size': 6}, numpoints=1)
-
-    if n_iter != 4:
-        plt.tick_params(axis='x', which='both', bottom='off',
-                        top='off', labelbottom='off')
 
 #############################################################################
 # GP kernel
@@ -121,7 +90,7 @@ def plot_optimizer(res, next_x, x, fx, n_iter, max_iters=5):
     f_val = objective(next_x)
     res = opt_gp.tell(next_x, f_val)
     if i >= 5:
-        plot_optimizer(res, opt_gp._next_x, x, fx, n_iter=i-5, max_iters=5)
+        plot_optimizer(res, opt_gp._next_x, n_iter=i-5, max_iters=5)
 plt.tight_layout(rect=[0, 0.03, 1, 0.95])
 plt.plot()
 
@@ -147,7 +116,6 @@ def plot_optimizer(res, next_x, x, fx, n_iter, max_iters=5):
                * (DotProduct(sigma_0=1.0, sigma_0_bounds=(0.1, 10.0)) ** 2),
            1.0 * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 10.0),
                         nu=2.5)]
-
 #############################################################################
 
 for kernel in kernels:
@@ -164,6 +132,6 @@ def plot_optimizer(res, next_x, x, fx, n_iter, max_iters=5):
         f_val = objective(next_x)
         res = opt.tell(next_x, f_val)
         if i >= 5:
-            plot_optimizer(res, opt._next_x, x, fx, n_iter=i - 5, max_iters=5)
+            plot_optimizer(res, opt._next_x, n_iter=i - 5, max_iters=5)
     plt.tight_layout(rect=[0, 0.03, 1, 0.95])
     plt.show()
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index e485b525f..74bb7c25f 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -2,7 +2,8 @@
 import warnings
 from math import log
 from numbers import Number
-
+import copy
+import inspect
 import numpy as np
 
 from scipy.optimize import fmin_l_bfgs_b
@@ -147,7 +148,8 @@ def __init__(self, dimensions, base_estimator="gp",
                  model_queue_size=None,
                  acq_func_kwargs=None,
                  acq_optimizer_kwargs=None):
-
+        self.specs = {"args": copy.copy(inspect.currentframe().f_locals),
+                      "function": "Optimizer"}
         self.rng = check_random_state(random_state)
 
         # Configure acquisition function
@@ -264,7 +266,6 @@ def __init__(self, dimensions, base_estimator="gp",
         self.yi = []
 
         # Initialize cache for `ask` method responses
-
         # This ensures that multiple calls to `ask` with n_points set
         # return same sets of points. Reset to {} at every call to `tell`.
         self.cache_ = {}
@@ -291,7 +292,6 @@ def copy(self, random_state=None):
 
         if hasattr(self, "gains_"):
             optimizer.gains_ = np.copy(self.gains_)
-
         if self.Xi:
             optimizer._tell(self.Xi, self.yi)
 
@@ -568,8 +568,11 @@ def _tell(self, x, y, fit=True):
                 next_x.reshape((1, -1)))[0]
 
         # Pack results
-        return create_result(self.Xi, self.yi, self.space, self.rng,
-                             models=self.models)
+        result = create_result(self.Xi, self.yi, self.space, self.rng,
+                               models=self.models)
+
+        result.specs = self.specs
+        return result
 
     def _check_y_is_valid(self, x, y):
         """Check if the shape and types of x and y are consistent."""
@@ -602,8 +605,10 @@ def run(self, func, n_iter=1):
             x = self.ask()
             self.tell(x, func(x))
 
-        return create_result(self.Xi, self.yi, self.space, self.rng,
-                             models=self.models)
+        result = create_result(self.Xi, self.yi, self.space, self.rng,
+                               models=self.models)
+        result.specs = self.specs
+        return result
 
     def update_next(self):
         """Updates the value returned by opt.ask(). Useful if a parameter
@@ -625,5 +630,7 @@ def get_result(self):
             OptimizeResult instance with the required information.
 
         """
-        return create_result(self.Xi, self.yi, self.space, self.rng,
-                             models=self.models)
+        result = create_result(self.Xi, self.yi, self.space, self.rng,
+                               models=self.models)
+        result.specs = self.specs
+        return result
diff --git a/skopt/plots.py b/skopt/plots.py
index 2879492d4..4876ed05b 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -5,6 +5,7 @@
 from functools import partial
 from scipy.optimize import OptimizeResult
 
+from .acquisition import _gaussian_acquisition
 from skopt import expected_minimum, expected_minimum_random_sampling
 from .space import Categorical
 
@@ -103,6 +104,168 @@ def plot_convergence(*args, **kwargs):
     return ax
 
 
+def plot_gaussian_process(res, **kwargs):
+    """Plots the optimization results and the gaussian process
+    for 1-D objective functions.
+
+    Parameters
+    ----------
+    res :  `OptimizeResult`
+        The result for which to plot the gaussian process.
+
+    ax : `Axes`, optional
+        The matplotlib axes on which to draw the plot, or `None` to create
+        a new one.
+
+    n_calls : int, default=-1
+        Can be used to evaluate the model at call `n_calls`.
+
+    objective : func, default=None
+        Defines the true objective function. Must have one input parameter.
+
+    noise_level : float, default=0
+        Sets the estimated noise level
+
+    show_legend : boolean, default=True
+        When True, a legend is plotted.
+
+    show_title : boolean, default=True
+        When True, a title containing the found minimum value
+        is shown
+
+    show_acq_func : boolean, default=False
+        When True, the acquisition function is plotted
+
+    show_next_point : boolean, default=False
+        When True, the next evaluated point is plotted
+
+    next_x : float, default=None
+        The next evaluated Point can also be defined.
+
+    show_observations : boolean, default=True
+        When True, observations are plotted as dots.
+
+    show_mu : boolean, default=True
+        When True, the predicted model is shown.
+
+    Returns
+    -------
+    ax : `Axes`
+        The matplotlib axes.
+    """
+    ax = kwargs.get("ax", None)
+    n_calls = kwargs.get("n_calls", -1)
+    objective = kwargs.get("objective", None)
+    noise_level = kwargs.get("noise_level", 0)
+    show_legend = kwargs.get("show_legend", True)
+    show_title = kwargs.get("show_title", True)
+    show_acq_func = kwargs.get("show_acq_func", False)
+    show_next_point = kwargs.get("show_next_point", False)
+    next_x = kwargs.get("next_x", None)
+    show_observations = kwargs.get("show_observations", True)
+    show_mu = kwargs.get("show_mu", True)
+    acq_func = kwargs.get("acq_func", None)
+    n_random = kwargs.get("n_random", None)
+    acq_func_kwargs = kwargs.get("acq_func_kwargs", None)
+
+    if ax is None:
+        ax = plt.gca()
+    bounds = res.space.dimensions[0].bounds
+    x = np.linspace(bounds[0], bounds[1], 400).reshape(-1, 1)
+    x_gp = res.space.transform(x.tolist())
+    if res.specs is not None and "args" in res.specs:
+        if n_random is None:
+            n_random = res.specs["args"].get('n_random_starts', n_random)
+        if acq_func is None:
+            acq_func = res.specs["args"].get("acq_func", "EI")
+        if acq_func_kwargs is None:
+            acq_func_kwargs = res.specs["args"].get("acq_func_kwargs", {})
+
+    if acq_func_kwargs is None:
+        acq_func_kwargs = {}
+    if acq_func is None or acq_func == "gp_hedge":
+        acq_func = "EI"
+    if n_random is None:
+        n_random = len(res.x_iters) - len(res.models)
+
+    if objective is not None:
+        fx = np.array([objective(x_i) for x_i in x])
+    if n_calls < 0:
+        gp = res.models[-1]
+        curr_x_iters = res.x_iters
+        curr_func_vals = res.func_vals
+    else:
+        gp = res.models[n_calls]
+
+        curr_x_iters = res.x_iters[:n_random + n_calls]
+        curr_func_vals = res.func_vals[:n_random + n_calls]
+
+    # Plot true function.
+    if objective is not None:
+        ax.plot(x, fx, "r--", label="True (unknown)")
+        ax.fill(np.concatenate([x, x[::-1]]),
+                np.concatenate(([fx_i - 1.9600 * noise_level for fx_i in fx],
+                                [fx_i + 1.9600 * noise_level for fx_i in fx[::-1]])),
+                alpha=.2, fc="r", ec="None")
+
+    # Plot GP(x) + contours
+    y_pred, sigma = gp.predict(x_gp, return_std=True)
+    if show_mu:
+        ax.plot(x, y_pred, "g--", label=r"$\mu_{GP}(x)$")
+        ax.fill(np.concatenate([x, x[::-1]]),
+                np.concatenate([y_pred - 1.9600 * sigma,
+                                (y_pred + 1.9600 * sigma)[::-1]]),
+                alpha=.2, fc="g", ec="None")
+
+    # Plot sampled points
+    if show_observations:
+        ax.plot(curr_x_iters, curr_func_vals,
+                "r.", markersize=8, label="Observations")
+    if (show_mu or show_observations or objective is not None) and show_acq_func:
+        ax_ei = ax.twinx()
+        ax_ei.set_ylabel(str(acq_func) + "(x)")
+        plot_both = True
+    else:
+        ax_ei = ax
+        plot_both = False
+    if show_acq_func:
+        acq = _gaussian_acquisition(x_gp, gp, y_opt=np.min(curr_func_vals),
+                                    acq_func=acq_func, acq_func_kwargs=acq_func_kwargs)
+        if acq_func in ["EI", "PI", "EIps", "PIps"]:
+            acq = - acq
+        ax_ei.plot(x, acq, "b", label=str(acq_func) + "(x)")
+        if not plot_both:
+            ax_ei.fill_between(x.ravel(), 0, acq.ravel(), alpha=0.3, color='blue')
+        if next_x is None and n_calls >= 0:
+            next_x = res.x_iters[n_random + n_calls]
+        if next_x is not None:
+            next_acq = _gaussian_acquisition(res.space.transform([next_x]), gp,
+                                             y_opt=np.min(curr_func_vals),
+                                             acq_func=acq_func,
+                                             acq_func_kwargs=acq_func_kwargs)
+            if acq_func in ["EI", "PI", "EIps", "PIps"]:
+                next_acq = -next_acq
+        if show_next_point and next_x is not None:
+            ax_ei.plot(next_x, next_acq, "bo", markersize=6, label="Next query point")
+
+    if show_title:
+        ax.set_title(r"x* = %.4f, f(x*) = %.4f" % (res.x[0], res.fun))
+    # Adjust plot layout
+    ax.grid()
+    ax.set_xlabel("x")
+    ax.set_ylabel("f(x)")
+    if show_legend:
+        if plot_both:
+            lines, labels = ax.get_legend_handles_labels()
+            lines2, labels2 = ax_ei.get_legend_handles_labels()
+            ax_ei.legend(lines + lines2, labels + labels2, loc="best", prop={'size': 6}, numpoints=1)
+        else:
+            ax.legend(loc="best", prop={'size': 6}, numpoints=1)
+
+    return ax
+
+
+
 def plot_regret(*args, **kwargs):
     """Plot one or several cumulative regret traces.
 

From 166a69b824183d8e07083f2193cf61c478aefbf1 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 14 Feb 2020 15:45:33 +0100
Subject: [PATCH 066/265] Refaktor code and remove utils.py in samples

---
 skopt/samples/base.py       |   4 +
 skopt/samples/halton.py     |  42 +++++++-
 skopt/samples/hammersly.py  |   2 +-
 skopt/samples/lhs.py        | 101 ++++++++++++++++--
 skopt/samples/sobol.py      |  69 ++++++++++++-
 skopt/samples/utils.py      | 198 ------------------------------------
 skopt/tests/test_samples.py |  17 ++--
 skopt/utils.py              |  12 ++-
 8 files changed, 219 insertions(+), 226 deletions(-)
 create mode 100644 skopt/samples/base.py
 delete mode 100644 skopt/samples/utils.py

diff --git a/skopt/samples/base.py b/skopt/samples/base.py
new file mode 100644
index 000000000..0bc23d8d1
--- /dev/null
+++ b/skopt/samples/base.py
@@ -0,0 +1,4 @@
+
+class InitialPointGenerator(object):
+    def generate(self, n_dim, n_samples, random_state=None):
+        raise NotImplemented
diff --git a/skopt/samples/halton.py b/skopt/samples/halton.py
index 7cfc43edd..741e720a8 100644
--- a/skopt/samples/halton.py
+++ b/skopt/samples/halton.py
@@ -3,8 +3,7 @@
 distributions/sampler/sequences/halton.py
 """
 import numpy as np
-from .utils import create_primes
-from .utils import InitialPointGenerator
+from .base import InitialPointGenerator
 
 
 class Halton(InitialPointGenerator):
@@ -53,7 +52,7 @@ def generate(self, n_dim, n_samples, random_state=None):
         if not primes:
             prime_order = 10 * n_dim
             while len(primes) < n_dim:
-                primes = create_primes(prime_order)
+                primes = _create_primes(prime_order)
                 prime_order *= 2
         primes = primes[:n_dim]
         assert len(primes) == n_dim, "not enough primes"
@@ -110,3 +109,40 @@ def _van_der_corput_samples(idx, number_base=2):
         base *= number_base
         active = idx > 0
     return out
+
+
+def _create_primes(threshold):
+    """
+    Generate prime values using sieve of Eratosthenes method.
+
+    Parameters
+    ----------
+    threshold : int
+        The upper bound for the size of the prime values.
+
+    Returns
+    ------
+    List
+        All primes from 2 and up to ``threshold``.
+    """
+    if threshold == 2:
+        return [2]
+
+    elif threshold < 2:
+        return []
+
+    numbers = list(range(3, threshold+1, 2))
+    root_of_threshold = threshold ** 0.5
+    half = int((threshold+1)/2-1)
+    idx = 0
+    counter = 3
+    while counter <= root_of_threshold:
+        if numbers[idx]:
+            idy = int((counter*counter-3)/2)
+            numbers[idy] = 0
+            while idy < half:
+                numbers[idy] = 0
+                idy += counter
+        idx += 1
+        counter = 2*idx+3
+    return [2] + [number for number in numbers if number]
diff --git a/skopt/samples/hammersly.py b/skopt/samples/hammersly.py
index 96329e840..6fc5efa45 100644
--- a/skopt/samples/hammersly.py
+++ b/skopt/samples/hammersly.py
@@ -4,7 +4,7 @@
 """
 import numpy as np
 from .halton import Halton
-from .utils import InitialPointGenerator
+from .base import InitialPointGenerator
 
 
 class Hammersly(InitialPointGenerator):
diff --git a/skopt/samples/lhs.py b/skopt/samples/lhs.py
index f0bb008cf..e9d2a0d3e 100644
--- a/skopt/samples/lhs.py
+++ b/skopt/samples/lhs.py
@@ -8,11 +8,11 @@
 gsa_module/samples/lhs_opt.py (Damar Wicaksono)
 """
 import numpy as np
+import math
 from sklearn.utils import check_random_state
 from scipy import spatial
-from .utils import random_permute_matrix
-from .utils import InitialPointGenerator
-from .utils import w2_discrepancy_fast, calc_max_inner, calc_num_candidate
+from ..utils import random_permute_matrix
+from .base import InitialPointGenerator
 
 
 class Lhs(InitialPointGenerator):
@@ -139,24 +139,24 @@ def generate(self, n_dim, n_samples, random_state=None):
                                                 random_state=random_state)
 
                 if self.ese_threshold_init <= 0.0:
-                    threshold = 0.005 * w2_discrepancy_fast(dm_init)
+                    threshold = 0.005 * _w2_discrepancy_fast(dm_init)
                 else:
                     threshold = self.ese_threshold_init
                 if self.ese_num_exchanges <= 0:  # number of exchanges
-                    num_exchanges = calc_num_candidate(n_samples)
+                    num_exchanges = _calc_num_candidate(n_samples)
                 else:
                     num_exchanges = self.ese_num_exchanges
                 # maximum number of inner iterations
                 if self.ese_max_inner <= 0:
-                    max_inner = calc_max_inner(n_samples, n_dim)
+                    max_inner = _calc_max_inner(n_samples, n_dim)
                 else:
                     max_inner = self.ese_max_inner
 
                 dm = dm_init.copy()  # the current design
                 # the best value of obj.func. so far
-                obj_func_best = w2_discrepancy_fast(dm)
+                obj_func_best = _w2_discrepancy_fast(dm)
                 # the old value of obj.func.
-                obj_func_best_old = w2_discrepancy_fast(dm)
+                obj_func_best_old = _w2_discrepancy_fast(dm)
                 flag_explore = False  # improved flag
 
                 best_evol = []  # Keep track the best solution
@@ -170,7 +170,7 @@ def generate(self, n_dim, n_samples, random_state=None):
 
                     # Begin Inner Iteration
                     for inner in range(max_inner):
-                        obj_func = w2_discrepancy_fast(dm)
+                        obj_func = _w2_discrepancy_fast(dm)
                         # Perturb current design
                         num_dimension = inner % n_dim
                         import itertools
@@ -195,14 +195,14 @@ def generate(self, n_dim, n_samples, random_state=None):
                                 pairs[i][1], num_dimension]
                             dm_try[pairs[i][1], num_dimension] = dm[
                                 pairs[i][0], num_dimension]
-                            obj_func_try = w2_discrepancy_fast(dm_try)
+                            obj_func_try = _w2_discrepancy_fast(dm_try)
                             if obj_func_try < obj_func_current:
                                 # Select the best trial from all the
                                 # perturbation trials
                                 obj_func_current = obj_func_try
                                 dm_current = dm_try.copy()
 
-                        obj_func_try = w2_discrepancy_fast(dm_current)
+                        obj_func_try = _w2_discrepancy_fast(dm_current)
                         # Check whether solution is acceptable
                         if (obj_func_try - obj_func) <=\
                                 threshold * rng.rand():
@@ -271,3 +271,82 @@ def generate(self, n_dim, n_samples, random_state=None):
                             threshold *= self.ese_exploring_params[2]
 
             return h_opt
+
+
+def _calc_num_candidate(n):
+    """Calculate the number of candidates from perturbing the current design
+    Recommended in the article is the maximum number of pair combination
+    from a given column divided by a factor of 5.
+    It is also recommended that the number of candidates to be evaluated does
+    not exceed 50
+
+    Parameters
+    ----------
+    n : int
+        the number of elements to be permuted
+    Returns
+    -------
+    the number of candidates from perturbing the current design
+        column-wise
+    """
+    pairs = math.factorial(n) / math.factorial(n - 2) / math.factorial(2)
+    fac = 5  # The factor recommended in the article
+
+    return min(int(pairs / fac), 50)
+
+
+def _calc_max_inner(n, k):
+    """Calculate the maximum number of inner iterations
+    :math:`\frac{2 \times n_e \times k}{J}`
+    It is recommended that the number of inner iterations does not exceed 100
+    Parameters
+    ----------
+    n : int
+        the number of samples in the design
+    k : int
+        the number of design dimension
+    Returns
+    -------
+    the maximum number of inner iterations/loop
+    """
+    pairs = math.factorial(n) / math.factorial(n - 2) / math.factorial(2)
+
+    return min(int(2 * pairs * k / _calc_num_candidate(n)), 100)
+
+
+def _w2_discrepancy_fast(D):
+    """The vectorized version of wrap-around L2-discrepancy
+    calculation, faster!
+    The formula for the Wrap-Around L2-Discrepancy is taken from Eq.5 of (1)
+    :math:`WD^2(D) = -(4/3)^K + 1/N^2 \Sigma_{i,j=1}^{N} \
+    Pi_{k=1}^K [3/2 - |x_k^1 - x_k^2| * (1 - |x_k^1 - x_k^2|)]`
+    The implementation below uses a vector operation of numpy array to
+    avoid the
+    nested loop in the more straightforward implementation
+
+    Parameters
+    ----------
+    D : np.array
+     the design matrix
+
+    Returns
+    -------
+    the wrap-around L2-discrepancy
+    """
+
+    n = D.shape[0]      # the number of samples
+    k = D.shape[1]      # the number of dimension
+    delta = [None] * k
+    for i in range(k):
+        # loop over dimension to calculate the absolute difference
+        # between point
+        # in a given dimension, note the vectorized operation
+        delta[i] = np.abs(D[:, i] - np.reshape(D[:, i], (len(D[:, i]), 1)))
+
+    product = 1.5 - delta[0] * (1 - delta[0])
+    for i in range(1, k):
+        product *= (1.5 - delta[i] * (1 - delta[i]))
+
+    w2_disc = -1 * (4.0/3.0)**k + 1/n**2 * np.sum(product)
+
+    return w2_disc
diff --git a/skopt/samples/sobol.py b/skopt/samples/sobol.py
index 242031d5b..ed52e16d1 100644
--- a/skopt/samples/sobol.py
+++ b/skopt/samples/sobol.py
@@ -18,8 +18,7 @@
 from __future__ import division
 import numpy as np
 from scipy.stats import norm
-from .utils import InitialPointGenerator
-from .utils import _bit_lo0, _bit_hi1, random_shift
+from .base import InitialPointGenerator
 from sklearn.utils import check_random_state
 
 
@@ -214,7 +213,7 @@ def generate(self, n_dim, n_samples, random_state=None):
         for j in range(n_samples):
             r[j, 0:n_dim], seed = self._sobol(n_dim, seed)
         if self.randomize:
-            return random_shift(r, random_state)
+            return _random_shift(r, random_state)
         return r
 
     def _sobol(self, dim_num, seed):
@@ -302,3 +301,67 @@ def _sobol(self, dim_num, seed):
         seed += 1
 
         return [quasi, seed]
+
+
+def _bit_hi1(n):
+    """
+    Returns the position of the high 1 bit base 2 in an integer.
+
+    Parameters
+    ----------
+    n : int
+        input, should be positive
+    """
+    bin_repr = np.binary_repr(n)
+    most_left_one = bin_repr.find('1')
+    if most_left_one == -1:
+        return 0
+    else:
+        return len(bin_repr) - most_left_one
+
+
+def _bit_lo0(n):
+    """
+    Returns the position of the low 0 bit base 2 in an integer.
+
+    Parameters
+    ----------
+    n : int
+        input, should be positive
+
+    """
+    bin_repr = np.binary_repr(n)
+    most_right_zero = bin_repr[::-1].find('0')
+    if most_right_zero == -1:
+        most_right_zero = len(bin_repr)
+    return most_right_zero + 1
+
+
+def _random_shift(dm, random_state=None):
+    """Random shifting of a vector
+    Randomization of the quasi-MC samples can be achieved
+    in the easiest manner by
+    random shift (or the Cranley-Patterson rotation).
+    Refereences
+    -----------
+    C. Lemieux, "Monte Carlo and Quasi-Monte Carlo Sampling," Springer
+    Series in Statistics 692, Springer Science+Business Media, New York,
+    2009
+
+    Parameters
+    ----------
+    dm : array, shape(n,d)
+        input matrix
+    random_state : int, RandomState instance, or None (default)
+        Set random state to something other than None for reproducible
+        results.
+
+    Returns
+    -------
+    Randomized Sobol' design matrix
+    """
+    rng = check_random_state(random_state)
+    # Generate random shift matrix from uniform distribution
+    shift = np.repeat(rng.rand(1, dm.shape[1]), dm.shape[0], axis=0)
+    # Return the shifted Sobol' design
+    return (dm + shift) % 1
diff --git a/skopt/samples/utils.py b/skopt/samples/utils.py
deleted file mode 100644
index 28629ab72..000000000
--- a/skopt/samples/utils.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import numpy as np
-from sklearn.utils import check_random_state
-import math
-
-
-def create_primes(threshold):
-    """
-    Generate prime values using sieve of Eratosthenes method.
-
-    Parameters
-    ----------
-    threshold : int
-        The upper bound for the size of the prime values.
-
-    Returns
-    ------
-    List
-        All primes from 2 and up to ``threshold``.
-    """
-    if threshold == 2:
-        return [2]
-
-    elif threshold < 2:
-        return []
-
-    numbers = list(range(3, threshold+1, 2))
-    root_of_threshold = threshold ** 0.5
-    half = int((threshold+1)/2-1)
-    idx = 0
-    counter = 3
-    while counter <= root_of_threshold:
-        if numbers[idx]:
-            idy = int((counter*counter-3)/2)
-            numbers[idy] = 0
-            while idy < half:
-                numbers[idy] = 0
-                idy += counter
-        idx += 1
-        counter = 2*idx+3
-    return [2] + [number for number in numbers if number]
-
-
-def w2_discrepancy_fast(D):
-    """The vectorized version of wrap-around L2-discrepancy
-    calculation, faster!
-    The formula for the Wrap-Around L2-Discrepancy is taken from Eq.5 of (1)
-    :math:`WD^2(D) = -(4/3)^K + 1/N^2 \Sigma_{i,j=1}^{N} \
-    Pi_{k=1}^K [3/2 - |x_k^1 - x_k^2| * (1 - |x_k^1 - x_k^2|)]`
-    The implementation below uses a vector operation of numpy array to
-    avoid the
-    nested loop in the more straightforward implementation
-
-    Parameters
-    ----------
-    D : np.array
-     the design matrix
-
-    Returns
-    -------
-    the wrap-around L2-discrepancy
-    """
-
-    n = D.shape[0]      # the number of samples
-    k = D.shape[1]      # the number of dimension
-    delta = [None] * k
-    for i in range(k):
-        # loop over dimension to calculate the absolute difference
-        # between point
-        # in a given dimension, note the vectorized operation
-        delta[i] = np.abs(D[:, i] - np.reshape(D[:, i], (len(D[:, i]), 1)))
-
-    product = 1.5 - delta[0] * (1 - delta[0])
-    for i in range(1, k):
-        product *= (1.5 - delta[i] * (1 - delta[i]))
-
-    w2_disc = -1 * (4.0/3.0)**k + 1/n**2 * np.sum(product)
-
-    return w2_disc
-
-
-def random_permute_matrix(h, random_state=None):
-    rng = check_random_state(random_state)
-    h_rand_perm = np.zeros_like(h)
-    samples, n = h.shape
-    for j in range(n):
-        order = rng.permutation(range(samples))
-        h_rand_perm[:, j] = h[order, j]
-    return h_rand_perm
-
-
-def _bit_hi1(n):
-    """
-    Returns the position of the high 1 bit base 2 in an integer.
-
-    Parameters
-    ----------
-    n : int
-        input, should be positive
-    """
-    bin_repr = np.binary_repr(n)
-    most_left_one = bin_repr.find('1')
-    if most_left_one == -1:
-        return 0
-    else:
-        return len(bin_repr) - most_left_one
-
-
-def _bit_lo0(n):
-    """
-    Returns the position of the low 0 bit base 2 in an integer.
-
-    Parameters
-    ----------
-    n : int
-        input, should be positive
-
-    """
-    bin_repr = np.binary_repr(n)
-    most_right_zero = bin_repr[::-1].find('0')
-    if most_right_zero == -1:
-        most_right_zero = len(bin_repr)
-    return most_right_zero + 1
-
-
-def random_shift(dm, random_state=None):
-    """Random shifting of a vector
-    Randomization of the quasi-MC samples can be achieved
-    in the easiest manner by
-    random shift (or the Cranley-Patterson rotation).
-    Refereences
-    -----------
-    C. Lemieux, "Monte Carlo and Quasi-Monte Carlo Sampling," Springer
-    Series in Statistics 692, Springer Science+Business Media, New York,
-    2009
-
-    Parameters
-    ----------
-    dm : array, shape(n,d)
-        input matrix
-    random_state : int, RandomState instance, or None (default)
-        Set random state to something other than None for reproducible
-        results.
-
-    Returns
-    -------
-    Randomized Sobol' design matrix
-    """
-    rng = check_random_state(random_state)
-    # Generate random shift matrix from uniform distribution
-    shift = np.repeat(rng.rand(1, dm.shape[1]), dm.shape[0], axis=0)
-    # Return the shifted Sobol' design
-    return (dm + shift) % 1
-
-
-def calc_num_candidate(n):
-    """Calculate the number of candidates from perturbing the current design
-    Recommended in the article is the maximum number of pair combination
-    from a given column divided by a factor of 5.
-    It is also recommended that the number of candidates to be evaluated does
-    not exceed 50
-
-    Parameters
-    ----------
-    n : int
-        the number of elements to be permuted
-    Returns
-    -------
-    the number of candidates from perturbing the current design
-        column-wise
-    """
-    pairs = math.factorial(n) / math.factorial(n - 2) / math.factorial(2)
-    fac = 5  # The factor recommended in the article
-
-    return min(int(pairs / fac), 50)
-
-
-def calc_max_inner(n, k):
-    """Calculate the maximum number of inner iterations
-    :math:`\frac{2 \times n_e \times k}{J}`
-    It is recommended that the number of inner iterations does not exceed 100
-    Parameters
-    ----------
-    n : int
-        the number of samples in the design
-    k : int
-        the number of design dimension
-    Returns
-    -------
-    the maximum number of inner iterations/loop
-    """
-    pairs = math.factorial(n) / math.factorial(n - 2) / math.factorial(2)
-
-    return min(int(2 * pairs * k / calc_num_candidate(n)), 100)
-
-
-class InitialPointGenerator(object):
-    def generate(self, n_dim, n_samples, random_state=None):
-        raise NotImplemented
diff --git a/skopt/tests/test_samples.py b/skopt/tests/test_samples.py
index db1da7ebb..5cbb15f93 100644
--- a/skopt/tests/test_samples.py
+++ b/skopt/tests/test_samples.py
@@ -17,11 +17,10 @@
 from skopt.space import Integer
 from skopt.space import Categorical
 from skopt.space import check_dimension as space_check_dimension
-from skopt.samples.sobol import Sobol
-from skopt.samples.utils import _bit_lo0, _bit_hi1, create_primes
-from skopt.samples.halton import _van_der_corput_samples
-from skopt.samples import Hammersly, Halton
-from skopt.samples.lhs import Lhs
+from skopt.samples.sobol import _bit_lo0, _bit_hi1
+from skopt.samples.halton import _van_der_corput_samples, _create_primes
+from skopt.samples import Hammersly, Halton, Lhs, Sobol
+from skopt.samples.lhs import Lhs, _w2_discrepancy_fast
 
 
 @pytest.mark.fast_test
@@ -132,11 +131,11 @@ def test_hammersly():
 @pytest.mark.fast_test
 def test_primes():
 
-    x = create_primes(1)
+    x = _create_primes(1)
     assert_array_equal(x, [])
-    x = create_primes(2)
+    x = _create_primes(2)
     assert_array_equal(x, [2])
-    x = create_primes(3)
+    x = _create_primes(3)
     assert_array_equal(x, [2, 3])
-    x = create_primes(20)
+    x = _create_primes(20)
     assert_array_equal(x, [2, 3, 5, 7, 11, 13, 17, 19])
diff --git a/skopt/utils.py b/skopt/utils.py
index 83d5e27ab..395413acf 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -1,6 +1,6 @@
 from copy import deepcopy
 from functools import wraps
-
+from sklearn.utils import check_random_state
 import numpy as np
 from scipy.optimize import OptimizeResult
 from scipy.optimize import minimize as sp_minimize
@@ -768,3 +768,13 @@ def wrapper(x):
         return wrapper
 
     return decorator
+
+
+def random_permute_matrix(h, random_state=None):
+    rng = check_random_state(random_state)
+    h_rand_perm = np.zeros_like(h)
+    samples, n = h.shape
+    for j in range(n):
+        order = rng.permutation(range(samples))
+        h_rand_perm[:, j] = h[order, j]
+    return h_rand_perm

From 9b7004cbbf674beb5cc486cf524e07dc8f7324a3 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sat, 15 Feb 2020 12:59:41 +0100
Subject: [PATCH 067/265] Remove lhs_ese and

*  optimized LHS using Enhanced Stochastic Evolutionary Alg. are removed as it is slow and performs not so well
* LHS default is changed to maximin
* Optimizer setting simplified and init_point_gen_kwargs has beed added
* Hammersly and halton has now a min_skip and max_skip parameter similar to sobol
---
 .../initial-sampling-method-integer.py        |  11 -
 examples/samples/initial-sampling-method.py   |  11 -
 skopt/optimizer/base.py                       |  24 +-
 skopt/optimizer/dummy.py                      |  23 +-
 skopt/optimizer/forest.py                     |  24 +-
 skopt/optimizer/gbrt.py                       |  24 +-
 skopt/optimizer/gp.py                         |  24 +-
 skopt/optimizer/optimizer.py                  |  63 ++--
 skopt/samples/halton.py                       |  39 ++-
 skopt/samples/hammersly.py                    |  22 +-
 skopt/samples/lhs.py                          | 273 +-----------------
 skopt/tests/test_samples.py                   |   5 -
 12 files changed, 108 insertions(+), 435 deletions(-)

diff --git a/examples/samples/initial-sampling-method-integer.py b/examples/samples/initial-sampling-method-integer.py
index 4fb3e5264..e1d5946af 100644
--- a/examples/samples/initial-sampling-method-integer.py
+++ b/examples/samples/initial-sampling-method-integer.py
@@ -129,17 +129,6 @@ def plot_branin(x, title):
 pdist_data.append(pdist(x).flatten())
 x_label.append("ratio")
 
-#############################################################################
-# ESE optimized hypercube sampling
-# --------------------------------
-
-lhs = Lhs(criterion="ese", iterations=100)
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'ese LHS')
-pdist_data.append(pdist(x).flatten())
-x_label.append("ese")
-
 #############################################################################
 # Halton sampling
 # ---------------
diff --git a/examples/samples/initial-sampling-method.py b/examples/samples/initial-sampling-method.py
index 30cedb7df..b7fa883a8 100644
--- a/examples/samples/initial-sampling-method.py
+++ b/examples/samples/initial-sampling-method.py
@@ -129,17 +129,6 @@ def plot_branin(x, title):
 pdist_data.append(pdist(x).flatten())
 x_label.append("ratio")
 
-#############################################################################
-# ESE optimized hypercube sampling
-# --------------------------------
-
-lhs = Lhs(criterion="ese", iterations=10)
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'ese LHS')
-pdist_data.append(pdist(x).flatten())
-x_label.append("ese")
-
 #############################################################################
 # Halton sampling
 # ---------------
diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index 0739755c5..3f751259c 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -26,7 +26,8 @@ def base_minimize(func, dimensions, base_estimator,
                   acq_func="EI", acq_optimizer="lbfgs",
                   x0=None, y0=None, random_state=None, verbose=False,
                   callback=None, n_points=10000, n_restarts_optimizer=5,
-                  xi=0.01, kappa=1.96, n_jobs=1, model_queue_size=None):
+                  xi=0.01, kappa=1.96, n_jobs=1, model_queue_size=None,
+                  init_point_gen_kwargs=None):
     """Base optimizer class
     Parameters
     ----------
@@ -82,21 +83,6 @@ def base_minimize(func, dimensions, base_estimator,
 
         - "lhs" for a latin hypercube sequence,
 
-        - "lhs_center" for a centered LHS sequence,
-
-        - "lhs_maximin" for a LHS sequence which is maximized regarding
-            the minimum distance of all points to each other
-
-        - "lhs_ratio" for a LHS sequence which is maximized regarding
-            the ratio between the maximum to the minimum distance of all
-            points to each other
-
-        - "lhs_correlation" for a LHS sequence which is minimized
-            regarding the correlation coefficients
-
-        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
-            stochastic evolutionary (ESE) algorithm
-
     acq_func : string, default=`"EI"`
         Function to minimize over the posterior distribution. Can be either
 
@@ -193,6 +179,9 @@ def base_minimize(func, dimensions, base_estimator,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -263,7 +252,8 @@ def base_minimize(func, dimensions, base_estimator,
                           random_state=random_state,
                           model_queue_size=model_queue_size,
                           acq_optimizer_kwargs=acq_optimizer_kwargs,
-                          acq_func_kwargs=acq_func_kwargs)
+                          acq_func_kwargs=acq_func_kwargs,
+                          init_point_gen_kwargs=init_point_gen_kwargs)
     # check x0: element-wise data type, dimensionality
     assert all(isinstance(p, Iterable) for p in x0)
     if not all(len(p) == optimizer.space.n_dims for p in x0):
diff --git a/skopt/optimizer/dummy.py b/skopt/optimizer/dummy.py
index c6ecb211c..f17837c10 100644
--- a/skopt/optimizer/dummy.py
+++ b/skopt/optimizer/dummy.py
@@ -6,7 +6,7 @@
 def dummy_minimize(func, dimensions, n_calls=100,
                    initial_point_generator="random", x0=None, y0=None,
                    random_state=None, verbose=False, callback=None,
-                   model_queue_size=None):
+                   model_queue_size=None, init_point_gen_kwargs=None):
     """Random search by uniform sampling within the given bounds.
 
     Parameters
@@ -49,21 +49,6 @@ def dummy_minimize(func, dimensions, n_calls=100,
 
         - "lhs" for a latin hypercube sequence,
 
-        - "lhs_center" for a centered LHS sequence,
-
-        - "lhs_maximin" for a LHS sequence which is maximized regarding
-            the minimum distance of all points to each other
-
-        - "lhs_ratio" for a LHS sequence which is maximized regarding
-            the ratio between the maximum to the minimum distance of all
-            points to each other
-
-        - "lhs_correlation" for a LHS sequence which is minimized
-            regarding the correlation coefficients
-
-        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
-            stochastic evolutionary (ESE) algorithm
-
     x0 : list, list of lists or `None`
         Initial input points.
 
@@ -98,6 +83,9 @@ def dummy_minimize(func, dimensions, n_calls=100,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -138,4 +126,5 @@ def dummy_minimize(func, dimensions, n_calls=100,
                          initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state,
                          verbose=verbose,
-                         callback=callback, model_queue_size=model_queue_size)
+                         callback=callback, model_queue_size=model_queue_size,
+                         init_point_gen_kwargs=init_point_gen_kwargs)
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index 98ee3f0be..a44b42cbd 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -12,7 +12,8 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
                     initial_point_generator="random",
                     x0=None, y0=None, random_state=None, verbose=False,
                     callback=None, n_points=10000, xi=0.01, kappa=1.96,
-                    n_jobs=1, model_queue_size=None):
+                    n_jobs=1, model_queue_size=None,
+                    init_point_gen_kwargs=None):
     """Sequential optimisation using decision trees.
 
     A tree based regression model is used to model the expensive to evaluate
@@ -90,21 +91,6 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
 
         - "lhs" for a latin hypercube sequence,
 
-        - "lhs_center" for a centered LHS sequence,
-
-        - "lhs_maximin" for a LHS sequence which is maximized regarding
-            the minimum distance of all points to each other
-
-        - "lhs_ratio" for a LHS sequence which is maximized regarding
-            the ratio between the maximum to the minimum distance of all
-            points to each other
-
-        - "lhs_correlation" for a LHS sequence which is minimized
-            regarding the correlation coefficients
-
-        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
-            stochastic evolutionary (ESE) algorithm
-
     acq_func : string, default="LCB"
         Function to minimize over the forest posterior. Can be either
 
@@ -169,6 +155,9 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -204,4 +193,5 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
                          acq_func=acq_func,
                          xi=xi, kappa=kappa, verbose=verbose,
                          callback=callback, acq_optimizer="sampling",
-                         model_queue_size=model_queue_size)
+                         model_queue_size=model_queue_size,
+                         init_point_gen_kwargs=init_point_gen_kwargs)
diff --git a/skopt/optimizer/gbrt.py b/skopt/optimizer/gbrt.py
index 0f732b64f..b0d9c11f7 100644
--- a/skopt/optimizer/gbrt.py
+++ b/skopt/optimizer/gbrt.py
@@ -11,7 +11,8 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
                   acq_func="EI", acq_optimizer="auto",
                   x0=None, y0=None, random_state=None, verbose=False,
                   callback=None, n_points=10000, xi=0.01, kappa=1.96,
-                  n_jobs=1, model_queue_size=None):
+                  n_jobs=1, model_queue_size=None,
+                  init_point_gen_kwargs=None):
     """Sequential optimization using gradient boosted trees.
 
     Gradient boosted regression trees are used to model the (very)
@@ -76,21 +77,6 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
 
         - "lhs" for a latin hypercube sequence,
 
-        - "lhs_center" for a centered LHS sequence,
-
-        - "lhs_maximin" for a LHS sequence which is maximized regarding
-            the minimum distance of all points to each other
-
-        - "lhs_ratio" for a LHS sequence which is maximized regarding
-            the ratio between the maximum to the minimum distance of all
-            points to each other
-
-        - "lhs_correlation" for a LHS sequence which is minimized
-            regarding the correlation coefficients
-
-        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
-            stochastic evolutionary (ESE) algorithm
-
     acq_func : string, default=`"LCB"`
         Function to minimize over the forest posterior. Can be either
 
@@ -153,6 +139,9 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -193,4 +182,5 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
                          x0=x0, y0=y0, random_state=random_state, xi=xi,
                          kappa=kappa, acq_func=acq_func, verbose=verbose,
                          callback=callback, acq_optimizer="sampling",
-                         model_queue_size=model_queue_size)
+                         model_queue_size=model_queue_size,
+                         init_point_gen_kwargs=init_point_gen_kwargs)
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index a57c67137..1bc37c0e3 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -15,7 +15,8 @@ def gp_minimize(func, dimensions, base_estimator=None,
                 acq_func="gp_hedge", acq_optimizer="auto", x0=None, y0=None,
                 random_state=None, verbose=False, callback=None,
                 n_points=10000, n_restarts_optimizer=5, xi=0.01, kappa=1.96,
-                noise="gaussian", n_jobs=1, model_queue_size=None):
+                noise="gaussian", n_jobs=1, model_queue_size=None,
+                init_point_gen_kwargs=None):
     """Bayesian optimization using Gaussian Processes.
 
     If every function evaluation is expensive, for instance
@@ -102,21 +103,6 @@ def gp_minimize(func, dimensions, base_estimator=None,
 
         - "lhs" for a latin hypercube sequence,
 
-        - "lhs_center" for a centered LHS sequence,
-
-        - "lhs_maximin" for a LHS sequence which is maximized regarding
-            the minimum distance of all points to each other
-
-        - "lhs_ratio" for a LHS sequence which is maximized regarding
-            the ratio between the maximum to the minimum distance of all
-            points to each other
-
-        - "lhs_correlation" for a LHS sequence which is minimized
-            regarding the correlation coefficients
-
-        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
-            stochastic evolutionary (ESE) algorithm
-
     acq_func : string, default=`"gp_hedge"`
         Function to minimize over the gaussian prior. Can be either
 
@@ -252,6 +238,9 @@ def gp_minimize(func, dimensions, base_estimator=None,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -299,4 +288,5 @@ def gp_minimize(func, dimensions, base_estimator=None,
         initial_point_generator=initial_point_generator,
         n_restarts_optimizer=n_restarts_optimizer,
         x0=x0, y0=y0, random_state=rng, verbose=verbose,
-        callback=callback, n_jobs=n_jobs, model_queue_size=model_queue_size)
+        callback=callback, n_jobs=n_jobs, model_queue_size=model_queue_size,
+        init_point_gen_kwargs=init_point_gen_kwargs)
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index d9cf7ed21..a8f5f0193 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -81,22 +81,7 @@ class Optimizer(object):
 
         - "hammersly" for a Hammersly sequence,
 
-        - "lhs" for a latin hypercube sequence,
-
-        - "lhs_center" for a centered LHS sequence,
-
-        - "lhs_maximin" for a LHS sequence which is maximized regarding
-            the minimum distance of all points to each other
-
-        - "lhs_ratio" for a LHS sequence which is maximized regarding
-            the ratio between the maximum to the minimum distance of all
-            points to each other
-
-        - "lhs_correlation" for a LHS sequence which is minimized
-            regarding the correlation coefficients
-
-        - "lhs_ese" for a LHS sequence which is optimized by an enhanced
-            stochastic evolutionary (ESE) algorithm
+        - "lhs" for a latin hypercube sequence
 
     acq_func : string, default=`"gp_hedge"`
         Function to minimize over the posterior distribution. Can be either
@@ -149,6 +134,9 @@ class Optimizer(object):
     acq_optimizer_kwargs : dict
         Additional arguments to be passed to the acquistion optimizer.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     model_queue_size : int or None, default=None
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
@@ -176,7 +164,8 @@ def __init__(self, dimensions, base_estimator="gp",
                  random_state=None,
                  model_queue_size=None,
                  acq_func_kwargs=None,
-                 acq_optimizer_kwargs=None):
+                 acq_optimizer_kwargs=None,
+                 init_point_gen_kwargs=None):
 
         self.rng = check_random_state(random_state)
 
@@ -277,48 +266,31 @@ def __init__(self, dimensions, base_estimator="gp",
 
         self._initial_samples = None
         self._initial_point_generator = initial_point_generator
+        if init_point_gen_kwargs is None:
+            init_point_gen_kwargs = dict()
+        self.init_point_gen_kwargs = init_point_gen_kwargs
         if initial_point_generator != "random" and \
                 isinstance(initial_point_generator, str):
             if initial_point_generator == "sobol":
                 from skopt.samples import Sobol
-                self._initial_point_generator = Sobol()
+                self._initial_point_generator = Sobol(**self.init_point_gen_kwargs)
             elif initial_point_generator == "halton":
                 from skopt.samples import Halton
-                self._initial_point_generator = Halton()
+                self._initial_point_generator = Halton(**self.init_point_gen_kwargs)
             elif initial_point_generator == "hammersly":
                 from skopt.samples import Hammersly
-                self._initial_point_generator = Hammersly()
-            elif initial_point_generator in ["lhs", "lhs_classic"]:
-                from skopt.samples import Lhs
-                self._initial_point_generator = Lhs(lhs_type="classic")
-            elif initial_point_generator == "lhs_centered":
-                from skopt.samples import Lhs
-                self._initial_point_generator = Lhs(lhs_type="centered")
-            elif initial_point_generator == "lhs_maximin":
-                from skopt.samples import Lhs
-                self._initial_point_generator = Lhs(criterion="maximin")
-            elif initial_point_generator == "lhs_ratio":
-                from skopt.samples import Lhs
-                self._initial_point_generator = Lhs(criterion="ratio")
-            elif initial_point_generator == "lhs_correlation":
-                from skopt.samples import Lhs
-                self._initial_point_generator = Lhs(criterion="correlation")
-            elif initial_point_generator == "lhs_ese":
+                self._initial_point_generator = Hammersly(**self.init_point_gen_kwargs)
+            elif initial_point_generator == "lhs":
                 from skopt.samples import Lhs
-                self._initial_point_generator = Lhs(criterion="ese",
-                                                    iterations=10)
+                self._initial_point_generator = Lhs(**self.init_point_gen_kwargs)
             else:
                 raise ValueError(
                     "Unkown initial_point_generator: " +
                     str(initial_point_generator)
                 )
-            try:
-                inv_initial_samples = self._initial_point_generator.generate(
-                    self.space.n_dims, n_initial_points,
-                    random_state=random_state)
-            except:
-                raise Exception("initial_point_generator is not a valid"
-                                "generator function")
+            inv_initial_samples = self._initial_point_generator.generate(
+                self.space.n_dims, n_initial_points,
+                random_state=random_state)
             transformer = self.space.get_transformer()
             self.space.set_transformer("normalize")
             self._initial_samples = self.space.inverse_transform(
@@ -367,6 +339,7 @@ def copy(self, random_state=None):
             acq_optimizer=self.acq_optimizer,
             acq_func_kwargs=self.acq_func_kwargs,
             acq_optimizer_kwargs=self.acq_optimizer_kwargs,
+            init_point_gen_kwargs=self.init_point_gen_kwargs,
             random_state=random_state,
         )
         optimizer._initial_samples = self._initial_samples
diff --git a/skopt/samples/halton.py b/skopt/samples/halton.py
index 741e720a8..8d43f4262 100644
--- a/skopt/samples/halton.py
+++ b/skopt/samples/halton.py
@@ -4,6 +4,7 @@
 """
 import numpy as np
 from .base import InitialPointGenerator
+from sklearn.utils import check_random_state
 
 
 class Halton(InitialPointGenerator):
@@ -20,16 +21,20 @@ class Halton(InitialPointGenerator):
 
     Parameters
     ----------
-    skip : int
-        Skip the first ``skip`` samples. If negative, the maximum of
-        ``primes`` is used.
-    primes : tuple
+    min_skip : int
+        minimum skipped seed number. When `min_skip != max_skip`
+        a random number is picked.
+    max_skip : int
+        maximum skipped seed number. When `min_skip != max_skip`
+        a random number is picked.
+    primes : tuple, default=None
         The (non-)prime base to calculate values along each axis. If
-        empty, growing prime values starting from 2 will be used.
+        empty or None, growing prime values starting from 2 will be used.
     """
-    def __init__(self, skip=-1, primes=()):
-        self.skip = skip
+    def __init__(self, min_skip=-1, max_skip=-1, primes=None):
         self.primes = primes
+        self.min_skip = min_skip
+        self.max_skip = max_skip
 
     def generate(self, n_dim, n_samples, random_state=None):
         """Creates samples from Halton set.
@@ -48,19 +53,27 @@ def generate(self, n_dim, n_samples, random_state=None):
         np.array, shape=(n_dim, n_samples)
             Halton set
         """
-        primes = list(self.primes)
-        if not primes:
+        rng = check_random_state(random_state)
+        if self.primes is None:
+            primes = []
+        else:
+            primes = list(self.primes)
+        if len(primes) < n_dim:
             prime_order = 10 * n_dim
             while len(primes) < n_dim:
                 primes = _create_primes(prime_order)
                 prime_order *= 2
-        primes = primes[:n_dim]
-        assert len(primes) == n_dim, "not enough primes"
 
-        if self.skip < 0:
+            primes = primes[:n_dim]
+        assert len(primes) == n_dim, "not enough primes"
+        if self.min_skip < 0 and self.max_skip < 0:
             skip = max(primes)
+        elif self.min_skip == self.max_skip:
+            skip = self.min_skip
+        elif self.min_skip < 0 or self.max_skip < 0:
+            skip = np.max(self.min_skip, self.max_skip)
         else:
-            skip = self.skip
+            skip = rng.randint(self.min_skip, self.max_skip)
 
         out = np.empty((n_dim, n_samples))
         indices = [idx + skip for idx in range(n_samples)]
diff --git a/skopt/samples/hammersly.py b/skopt/samples/hammersly.py
index 6fc5efa45..efb320bb7 100644
--- a/skopt/samples/hammersly.py
+++ b/skopt/samples/hammersly.py
@@ -11,7 +11,7 @@ class Hammersly(InitialPointGenerator):
     """Creates `Hammersley` sequence samples.
     The Hammersley set is equivalent to the Halton sequence, except for one
     dimension is replaced with a regular grid. It is not recommended to
-    generate a Hammersley sequence more than 10 dimension.
+    generate a Hammersley sequence with more than 10 dimension.
 
     For ``dim == 1`` the sequence falls back to Van Der Corput sequence.
 
@@ -22,16 +22,20 @@ class Hammersly(InitialPointGenerator):
 
     Parameters
     ----------
-    skip : int
-        Skip the first ``skip`` samples. If negative, the maximum of
-        ``primes`` is used.
-    primes : tuple
+    min_skip : int, default=-1
+        minimum skipped seed number. When `min_skip != max_skip` and
+        both are > -1, a random number is picked.
+    max_skip : int, default=-1
+        maximum skipped seed number. When `min_skip != max_skip` and
+        both are > -1, a random number is picked.
+    primes : tuple, default=None
         The (non-)prime base to calculate values along each axis. If
         empty, growing prime values starting from 2 will be used.
     """
-    def __init__(self, skip=-1, primes=()):
-        self.skip = skip
+    def __init__(self, min_skip=-1, max_skip=-1, primes=None):
         self.primes = primes
+        self.min_skip = min_skip
+        self.max_skip = max_skip
 
     def generate(self, n_dim, n_samples, random_state=None):
         """Creates samples from Hammersly set.
@@ -52,7 +56,9 @@ def generate(self, n_dim, n_samples, random_state=None):
         np.array, shape=(n_dim, n_samples)
             Hammersley set
         """
-        halton = Halton(skip=self.skip, primes=self.primes)
+
+        halton = Halton(min_skip=self.min_skip, max_skip=self.max_skip,
+                        primes=self.primes)
 
         if n_dim == 1:
             return halton.generate(n_dim, n_samples,
diff --git a/skopt/samples/lhs.py b/skopt/samples/lhs.py
index e9d2a0d3e..98d448edb 100644
--- a/skopt/samples/lhs.py
+++ b/skopt/samples/lhs.py
@@ -2,13 +2,8 @@
 Lhs functions are inspired by
 https://github.com/clicumu/pyDOE2/blob/
 master/pyDOE2/doe_lhs.py
-
-lhs ese is inspired by
-https://github.com/damar-wicaksono/gsa-module/blob/develop/
-gsa_module/samples/lhs_opt.py (Damar Wicaksono)
 """
 import numpy as np
-import math
 from sklearn.utils import check_random_state
 from scipy import spatial
 from ..utils import random_permute_matrix
@@ -20,51 +15,27 @@ class Lhs(InitialPointGenerator):
 
     Parameters
     ----------
-    lhs_type : str
-        - `classic` - a small random number is added
-        - `centered` - points are set uniformly in each interval
+    lhs_type : str, default='classic'
+        - 'classic' - a small random number is added
+        - 'centered' - points are set uniformly in each interval
 
-    criterion : str or None, default=None
+    criterion : str or None, default='maximin'
         When set to None, the LHS is not optimized
 
-        - `correlation` : optimized LHS by minimizing the correlation
-
-        - `maximin` : optimized LHS by maximizing the minimal pdist
-
-        - `ratio` : optimized LHS by minimizing the ratio
-        `max(pdist) / min(pdist)`
-
-        - `ese` : optimized LHS using Enhanced Stochastic Evolutionary Alg.
+        - 'correlation' : optimized LHS by minimizing the correlation
+        - 'maximin' : optimized LHS by maximizing the minimal pdist
+        - 'ratio' : optimized LHS by minimizing the ratio
+          `max(pdist) / min(pdist)`
 
     iterations : int
         Defines the number of iterations for optimizing LHS
     """
-    def __init__(self, lhs_type="classic", criterion=None, iterations=1000):
+    def __init__(self, lhs_type="classic", criterion="maximin",
+                 iterations=1000):
         self.lhs_type = lhs_type
         self.criterion = criterion
         self.iterations = iterations
 
-        #  ese optimization parameters
-        # the initial threshold
-        self.ese_threshold_init = 0
-        # the number of candidates
-        # in perturbation step
-        self.ese_num_exchanges = 0
-        # the maximum number of inner iterations
-        self.ese_max_inner = 0
-        # the 2 parameters used in improve process
-        #         (a) the cut-off value to decrease the threshold
-        #         (b) the multiplier to decrease or increase the threshold
-        self.ese_improving_params = [0.1, 0.8]
-        # the 4 parameters used in explore process
-        #         (a) the cut-off value of acceptance, start increasing
-        #         the threshold
-        #         (b) the cut-off value of acceptance, start decreasing
-        #         the threshold
-        #         (c) the cooling multiplier for the threshold
-        #         (d) the warming multiplier for the threshold
-        self.ese_exploring_params = [0.1, 0.8, 0.9, 0.7]
-
     def generate(self, n_dim, n_samples, random_state=None):
         """Creates latin hypercube samples.
 
@@ -94,6 +65,8 @@ def generate(self, n_dim, n_samples, random_state=None):
             elif self.lhs_type == "classic":
                 for j in range(n_dim):
                     h[:, j] = u[:, j] * np.diff(x) + x[:n_samples]
+            else:
+                raise ValueError("Wrong lhs_type. Got ".format(self.lhs_type))
             return random_permute_matrix(h, random_state=random_state)
         else:
             internal_lhs = Lhs(lhs_type=self.lhs_type, criterion=None)
@@ -110,7 +83,7 @@ def generate(self, n_dim, n_samples, random_state=None):
                         mincorr = np.max(np.abs(r - np.eye(r.shape[0])))
                         h_opt = h.copy()
 
-            elif self.criterion == "maxmin":
+            elif self.criterion == "maximin":
                 maxdist = 0
                 # Maximize the minimum distance between points
                 for i in range(self.iterations):
@@ -120,7 +93,6 @@ def generate(self, n_dim, n_samples, random_state=None):
                     if maxdist < np.min(d):
                         maxdist = np.min(d)
                         h_opt = h.copy()
-
             elif self.criterion == "ratio":
                 minratio = np.inf
 
@@ -133,220 +105,7 @@ def generate(self, n_dim, n_samples, random_state=None):
                     if minratio > ratio:
                         minratio = ratio
                         h_opt = h.copy()
-            elif self.criterion == "ese":
-
-                dm_init = internal_lhs.generate(n_dim, n_samples,
-                                                random_state=random_state)
-
-                if self.ese_threshold_init <= 0.0:
-                    threshold = 0.005 * _w2_discrepancy_fast(dm_init)
-                else:
-                    threshold = self.ese_threshold_init
-                if self.ese_num_exchanges <= 0:  # number of exchanges
-                    num_exchanges = _calc_num_candidate(n_samples)
-                else:
-                    num_exchanges = self.ese_num_exchanges
-                # maximum number of inner iterations
-                if self.ese_max_inner <= 0:
-                    max_inner = _calc_max_inner(n_samples, n_dim)
-                else:
-                    max_inner = self.ese_max_inner
-
-                dm = dm_init.copy()  # the current design
-                # the best value of obj.func. so far
-                obj_func_best = _w2_discrepancy_fast(dm)
-                # the old value of obj.func.
-                obj_func_best_old = _w2_discrepancy_fast(dm)
-                flag_explore = False  # improved flag
-
-                best_evol = []  # Keep track the best solution
-                try_evol = []  # Keep track the accepted trial solution
-
-                # Begin Outer Iteration
-                for outer in range(self.iterations):
-                    # Initialization of Inner Iteration
-                    n_accepted = 0  # number of accepted trial
-                    n_improved = 0  # number of improved trial
-
-                    # Begin Inner Iteration
-                    for inner in range(max_inner):
-                        obj_func = _w2_discrepancy_fast(dm)
-                        # Perturb current design
-                        num_dimension = inner % n_dim
-                        import itertools
-
-                        # Create pairs of all possible combination
-                        pairs = list(itertools.combinations(
-                            [_ for _ in range(n_samples)], 2))
-                        # Create random choices for the pair of
-                        # perturbation, w/o replacement
-                        rand_choices = rng.choice(len(pairs), num_exchanges,
-                                                  replace=False)
-                        # Initialize the search
-                        obj_func_current = np.inf
-                        dm_current = dm.copy()
-                        for i in rand_choices:
-                            # Always perturb from the design passed
-                            # in argument
-                            dm_try = dm.copy()
-                            # Do column-wise operation in a given
-                            # column 'num_dimension'
-                            dm_try[pairs[i][0], num_dimension] = dm[
-                                pairs[i][1], num_dimension]
-                            dm_try[pairs[i][1], num_dimension] = dm[
-                                pairs[i][0], num_dimension]
-                            obj_func_try = _w2_discrepancy_fast(dm_try)
-                            if obj_func_try < obj_func_current:
-                                # Select the best trial from all the
-                                # perturbation trials
-                                obj_func_current = obj_func_try
-                                dm_current = dm_try.copy()
-
-                        obj_func_try = _w2_discrepancy_fast(dm_current)
-                        # Check whether solution is acceptable
-                        if (obj_func_try - obj_func) <=\
-                                threshold * rng.rand():
-                            # Accept solution
-                            dm = dm_current.copy()
-                            n_accepted += 1
-                            try_evol.append(obj_func_try)
-                            if obj_func_try < obj_func_best:
-                                # Best solution found
-                                h_opt = dm.copy()
-                                obj_func_best = obj_func_try
-                                best_evol.append(obj_func_best)
-                                n_improved += 1
-
-                    # Accept/Reject as Best Solution for convergence checking
-                    if ((obj_func_best_old - obj_func_best) /
-                        obj_func_best) > 1e-6:
-                        # Improvement found
-                        obj_func_best_old = obj_func_best
-                        # Reset the explore flag after new best found
-                        flag_explore = False
-                        flag_imp = True
-                    else:
-                        # Improvement not found
-                        flag_imp = False
-
-                    # Improve vs. Explore Phase and Threshold Update
-                    if flag_imp:  # Improve
-                        # New best solution found, carry out
-                        # improvement process
-                        if (float(n_accepted / num_exchanges) >
-                            self.ese_improving_params[0]) & \
-                                (n_accepted > n_improved):
-                            # Lots acceptance but not all of them
-                            # is improvement,
-                            # reduce threshold, make it harder to
-                            # accept a trial
-                            threshold *= self.ese_improving_params[1]
-                        else:
-                            # Few acceptance or all trials are improvement,
-                            # increase threshold
-                            # make it easier to accept a trial
-                            threshold /= self.ese_improving_params[1]
-                    # Explore, No new best solution found
-                    # during last iteration
-                    else:
-                        # Exploring process, warming up vs. cooling down
-                        if n_accepted < self.ese_exploring_params[0] *\
-                                num_exchanges:
-                            # Reach below limit, increase threshold
-                            # ("warming up")
-                            flag_explore = True
-                        elif n_accepted > self.ese_exploring_params[1] *\
-                                num_exchanges:
-                            # Reach above limit, decrease threshold
-                            # ("cooling down")
-                            flag_explore = False
-
-                        if flag_explore:
-                            # Ramp up exploration and below upper limit,
-                            # increase threshold
-                            threshold /= self.ese_exploring_params[3]
-                        elif not flag_explore:
-                            # Slow down exploration and above lower limit,
-                            # decrease threshold
-                            threshold *= self.ese_exploring_params[2]
-
+            else:
+                raise ValueError("Wrong criterion."
+                                 "Got {}".format(self.criterion))
             return h_opt
-
-
-def _calc_num_candidate(n):
-    """Calculate the number of candidates from perturbing the current design
-    Recommended in the article is the maximum number of pair combination
-    from a given column divided by a factor of 5.
-    It is also recommended that the number of candidates to be evaluated does
-    not exceed 50
-
-    Parameters
-    ----------
-    n : int
-        the number of elements to be permuted
-    Returns
-    -------
-    the number of candidates from perturbing the current design
-        column-wise
-    """
-    pairs = math.factorial(n) / math.factorial(n - 2) / math.factorial(2)
-    fac = 5  # The factor recommended in the article
-
-    return min(int(pairs / fac), 50)
-
-
-def _calc_max_inner(n, k):
-    """Calculate the maximum number of inner iterations
-    :math:`\frac{2 \times n_e \times k}{J}`
-    It is recommended that the number of inner iterations does not exceed 100
-    Parameters
-    ----------
-    n : int
-        the number of samples in the design
-    k : int
-        the number of design dimension
-    Returns
-    -------
-    the maximum number of inner iterations/loop
-    """
-    pairs = math.factorial(n) / math.factorial(n - 2) / math.factorial(2)
-
-    return min(int(2 * pairs * k / _calc_num_candidate(n)), 100)
-
-
-def _w2_discrepancy_fast(D):
-    """The vectorized version of wrap-around L2-discrepancy
-    calculation, faster!
-    The formula for the Wrap-Around L2-Discrepancy is taken from Eq.5 of (1)
-    :math:`WD^2(D) = -(4/3)^K + 1/N^2 \Sigma_{i,j=1}^{N} \
-    Pi_{k=1}^K [3/2 - |x_k^1 - x_k^2| * (1 - |x_k^1 - x_k^2|)]`
-    The implementation below uses a vector operation of numpy array to
-    avoid the
-    nested loop in the more straightforward implementation
-
-    Parameters
-    ----------
-    D : np.array
-     the design matrix
-
-    Returns
-    -------
-    the wrap-around L2-discrepancy
-    """
-
-    n = D.shape[0]      # the number of samples
-    k = D.shape[1]      # the number of dimension
-    delta = [None] * k
-    for i in range(k):
-        # loop over dimension to calculate the absolute difference
-        # between point
-        # in a given dimension, note the vectorized operation
-        delta[i] = np.abs(D[:, i] - np.reshape(D[:, i], (len(D[:, i]), 1)))
-
-    product = 1.5 - delta[0] * (1 - delta[0])
-    for i in range(1, k):
-        product *= (1.5 - delta[i] * (1 - delta[i]))
-
-    w2_disc = -1 * (4.0/3.0)**k + 1/n**2 * np.sum(product)
-
-    return w2_disc
diff --git a/skopt/tests/test_samples.py b/skopt/tests/test_samples.py
index 5cbb15f93..2256e2ece 100644
--- a/skopt/tests/test_samples.py
+++ b/skopt/tests/test_samples.py
@@ -20,7 +20,6 @@
 from skopt.samples.sobol import _bit_lo0, _bit_hi1
 from skopt.samples.halton import _van_der_corput_samples, _create_primes
 from skopt.samples import Hammersly, Halton, Lhs, Sobol
-from skopt.samples.lhs import Lhs, _w2_discrepancy_fast
 
 
 @pytest.mark.fast_test
@@ -40,10 +39,6 @@ def test_lhs_criterion():
         samples = lhs.generate(2, 200)
         assert len(samples) == 200
         assert len(samples[0]) == 2
-    lhs = Lhs(criterion="ese", iterations=1)
-    samples = lhs.generate(2, 20)
-    assert len(samples) == 20
-    assert len(samples[0]) == 2
 
 
 @pytest.mark.fast_test

From 7b60185576db9f5e594925f7a962f10a08391e7e Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sat, 15 Feb 2020 22:01:47 +0100
Subject: [PATCH 068/265] Rename IntegerEncoder to LabelEncoder

---
 doc/modules/classes.rst          |  2 +-
 skopt/optimizer/optimizer.py     | 14 +++++++++-----
 skopt/space/space.py             |  4 ++--
 skopt/space/transformers.py      | 20 +++++++++++++-------
 skopt/tests/test_samples.py      |  1 +
 skopt/tests/test_transformers.py | 14 +++++++-------
 6 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 13e96ce33..db8405f74 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -296,7 +296,7 @@ details.
     space.transformers.Normalize
     space.transformers.Pipeline
     space.transformers.Transformer
-    space.transformers.IntegerEncoder
+    space.transformers.LabelEncoder
     space.transformers.StringEncoder
 
 
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index a8f5f0193..73296e47c 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -273,16 +273,20 @@ def __init__(self, dimensions, base_estimator="gp",
                 isinstance(initial_point_generator, str):
             if initial_point_generator == "sobol":
                 from skopt.samples import Sobol
-                self._initial_point_generator = Sobol(**self.init_point_gen_kwargs)
+                self._initial_point_generator = Sobol(
+                    **self.init_point_gen_kwargs)
             elif initial_point_generator == "halton":
                 from skopt.samples import Halton
-                self._initial_point_generator = Halton(**self.init_point_gen_kwargs)
+                self._initial_point_generator = Halton(
+                    **self.init_point_gen_kwargs)
             elif initial_point_generator == "hammersly":
                 from skopt.samples import Hammersly
-                self._initial_point_generator = Hammersly(**self.init_point_gen_kwargs)
+                self._initial_point_generator = Hammersly(
+                    **self.init_point_gen_kwargs)
             elif initial_point_generator == "lhs":
                 from skopt.samples import Lhs
-                self._initial_point_generator = Lhs(**self.init_point_gen_kwargs)
+                self._initial_point_generator = Lhs(
+                    **self.init_point_gen_kwargs)
             else:
                 raise ValueError(
                     "Unkown initial_point_generator: " +
@@ -290,7 +294,7 @@ def __init__(self, dimensions, base_estimator="gp",
                 )
             inv_initial_samples = self._initial_point_generator.generate(
                 self.space.n_dims, n_initial_points,
-                random_state=random_state)
+                random_state=self.rng.randint(0, np.iinfo(np.int32).max))
             transformer = self.space.get_transformer()
             self.space.set_transformer("normalize")
             self._initial_samples = self.space.inverse_transform(
diff --git a/skopt/space/space.py b/skopt/space/space.py
index 845e862cc..9bd4ecd3d 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -11,7 +11,7 @@
 
 from .transformers import CategoricalEncoder
 from .transformers import StringEncoder
-from .transformers import IntegerEncoder
+from .transformers import LabelEncoder
 from .transformers import Normalize
 from .transformers import Identity
 from .transformers import LogN
@@ -595,7 +595,7 @@ def set_transformer(self, transform="onehot"):
             self.transformer.fit(self.categories)
         elif transform == "normalize":
             self.transformer = Pipeline(
-                [IntegerEncoder(list(self.categories)),
+                [LabelEncoder(list(self.categories)),
                  Normalize(0, len(self.categories) - 1)])
         else:
             self.transformer = Identity()
diff --git a/skopt/space/transformers.py b/skopt/space/transformers.py
index 32bd95567..e262b512d 100644
--- a/skopt/space/transformers.py
+++ b/skopt/space/transformers.py
@@ -1,6 +1,7 @@
 from __future__ import division
 import numpy as np
 from sklearn.preprocessing import LabelBinarizer
+from sklearn.utils import column_or_1d
 
 
 class Transformer(object):
@@ -151,8 +152,8 @@ def inverse_transform(self, Xt):
         ]
 
 
-class IntegerEncoder(Transformer):
-    """IntegerEncoder that can handle categorical variables."""
+class LabelEncoder(Transformer):
+    """LabelEncoder that can handle categorical variables."""
     def __init__(self, X=None):
         if X is not None:
             self.fit(X)
@@ -165,7 +166,15 @@ def fit(self, X):
         X : array-like, shape=(n_categories,)
             List of categories.
         """
-        self.mapping_ = {v: i for i, v in enumerate(X)}
+        X = np.asarray(X)
+        if X.dtype == object:
+            self.mapping_ = {v: i for i, v in enumerate(X)}
+        else:
+            i = 0
+            self.mapping_ = {}
+            for v in np.unique(X):
+                self.mapping_[v] = i
+                i += 1
         self.inverse_mapping_ = {i: v for v, i in self.mapping_.items()}
         return self
 
@@ -200,10 +209,7 @@ def inverse_transform(self, Xt):
         X : array-like, shape=(n_samples,)
             The original categories.
         """
-        if isinstance(Xt, (float, np.float64)):
-            Xt = [Xt]
-        else:
-            Xt = np.asarray(Xt)
+        Xt = np.asarray(Xt)
         return [
             self.inverse_mapping_[int(np.round(i))] for i in Xt
         ]
diff --git a/skopt/tests/test_samples.py b/skopt/tests/test_samples.py
index 2256e2ece..b0f8c2a32 100644
--- a/skopt/tests/test_samples.py
+++ b/skopt/tests/test_samples.py
@@ -53,6 +53,7 @@ def test_bit():
     for i in range(len(X)):
         assert _bit_hi1(X[i]) == res[i]
 
+
 @pytest.mark.fast_test
 def test_sobol():
     sobol = Sobol()
diff --git a/skopt/tests/test_transformers.py b/skopt/tests/test_transformers.py
index 130338fe1..d1d62e929 100644
--- a/skopt/tests/test_transformers.py
+++ b/skopt/tests/test_transformers.py
@@ -6,7 +6,7 @@
 from numpy.testing import assert_equal
 from numpy.testing import assert_raises_regex
 from skopt.space import LogN, Normalize
-from skopt.space.transformers import StringEncoder, IntegerEncoder, Identity
+from skopt.space.transformers import StringEncoder, LabelEncoder, Identity
 
 
 @pytest.mark.fast_test
@@ -30,13 +30,13 @@ def test_logn10_integer():
 @pytest.mark.fast_test
 def test_integer_encoder():
 
-    transformer = IntegerEncoder()
+    transformer = LabelEncoder()
     X = [1, 5, 9]
     transformer.fit(X)
     assert_array_equal(transformer.transform(X), [0, 1, 2])
     assert_array_equal(transformer.inverse_transform([0, 1, 2]), X)
 
-    transformer = IntegerEncoder(X)
+    transformer = LabelEncoder(X)
     assert_array_equal(transformer.transform(X), [0, 1, 2])
     assert_array_equal(transformer.inverse_transform([0, 1, 2]), X)
 
@@ -45,7 +45,7 @@ def test_integer_encoder():
     assert_array_equal(transformer.transform(X), [0, 1, 2])
     assert_array_equal(transformer.inverse_transform([0, 1, 2]), X)
 
-    transformer = IntegerEncoder(X)
+    transformer = LabelEncoder(X)
     assert_array_equal(transformer.transform(X), [0, 1, 2])
     assert_array_equal(transformer.inverse_transform([0, 1, 2]), X)
 
@@ -74,17 +74,17 @@ def test_string_encoder():
 def test_identity_encoder():
 
     transformer = Identity()
-    X = [1, 5, 9]
+    X = [1, 5, 9, 9, 5, 1]
     transformer.fit(X)
     assert_array_equal(transformer.transform(X), X)
     assert_array_equal(transformer.inverse_transform(X), X)
 
-    X = ['a', True, 1]
+    X = ['a', True, 1, 'a', True, 1]
     transformer.fit(X)
     assert_array_equal(transformer.transform(X), X)
     assert_array_equal(transformer.inverse_transform(X), X)
 
-    X = ["a", "b", "c"]
+    X = ["a", "b", "c", "a", "b", "c"]
     transformer.fit(X)
     assert_array_equal(transformer.transform(X), X)
     assert_array_equal(transformer.inverse_transform(X), X)

From 24ff1f36df109b79ff3e3eac02efa0687acd642f Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 16 Feb 2020 23:21:30 +0100
Subject: [PATCH 069/265] fix unit test

---
 skopt/space/transformers.py | 5 ++++-
 skopt/tests/test_space.py   | 6 ++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/skopt/space/transformers.py b/skopt/space/transformers.py
index e262b512d..b616ac584 100644
--- a/skopt/space/transformers.py
+++ b/skopt/space/transformers.py
@@ -209,7 +209,10 @@ def inverse_transform(self, Xt):
         X : array-like, shape=(n_samples,)
             The original categories.
         """
-        Xt = np.asarray(Xt)
+        if isinstance(Xt, (float, np.float64)):
+            Xt = [Xt]
+        else:
+            Xt = np.asarray(Xt)
         return [
             self.inverse_mapping_[int(np.round(i))] for i in Xt
         ]
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index 9a6121028..de058436a 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -523,6 +523,12 @@ def test_normalize_categorical():
                                                            random_state=1))))
     assert_array_equal(categories, a.inverse_transform([0., 0.5, 1.]))
 
+    categories = [1., 2., 3.]
+    a = Categorical(categories, transform="normalize")
+    assert_array_equal(categories, np.sort(np.unique(a.rvs(100,
+                                                           random_state=1))))
+    assert_array_equal(categories, a.inverse_transform([0., 0.5, 1.]))
+
     categories = [1, 2, 3]
     a = Categorical(categories, transform="string")
     a.set_transformer("normalize")

From 1a790181ad7a205c7b5dd027db85d06ccb607a44 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 17 Feb 2020 11:19:16 +0100
Subject: [PATCH 070/265] Add sampling comparison example

---
 .../initial-sampling-method-integer.py        |  31 ++-
 examples/samples/initial-sampling-method.py   |  20 +-
 examples/samples/sampling_comparison.py       | 195 ++++++++++++++++++
 3 files changed, 225 insertions(+), 21 deletions(-)
 create mode 100644 examples/samples/sampling_comparison.py

diff --git a/examples/samples/initial-sampling-method-integer.py b/examples/samples/initial-sampling-method-integer.py
index e1d5946af..6dfbd746d 100644
--- a/examples/samples/initial-sampling-method-integer.py
+++ b/examples/samples/initial-sampling-method-integer.py
@@ -35,7 +35,7 @@
 
 #############################################################################
 
-def plot_branin(x, title):
+def plot_searchspace(x, title):
     fig, ax = plt.subplots()
     plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', label='samples')
     plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bs', markersize=40, alpha=0.5)
@@ -48,7 +48,7 @@ def plot_branin(x, title):
     ax.grid(True)
 
 n_dim = 2
-n_samples = 5
+n_samples = 36
 
 space = Space([(0, 5), (0, 5)])
 space.set_transformer("normalize")
@@ -57,9 +57,10 @@ def plot_branin(x, title):
 # Random sampling
 # ---------------
 x = space.rvs(n_samples)
-plot_branin(x, "Random samples")
+plot_searchspace(x, "Random samples")
 pdist_data = []
 x_label = []
+print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
 x_label.append("random")
 #############################################################################
@@ -69,7 +70,8 @@ def plot_branin(x, title):
 sobol = Sobol()
 inv_initial_samples = sobol.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'Sobol')
+plot_searchspace(x, 'Sobol')
+print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
 x_label.append("sobol")
 
@@ -81,7 +83,8 @@ def plot_branin(x, title):
 lhs = Lhs(lhs_type="classic")
 inv_initial_samples = lhs.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'classic LHS')
+plot_searchspace(x, 'classic LHS')
+print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
 x_label.append("lhs")
 
@@ -92,7 +95,8 @@ def plot_branin(x, title):
 lhs = Lhs(lhs_type="centered")
 inv_initial_samples = lhs.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'centered LHS')
+plot_searchspace(x, 'centered LHS')
+print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
 x_label.append("center")
 
@@ -103,7 +107,8 @@ def plot_branin(x, title):
 lhs = Lhs(criterion="maximin", iterations=1000)
 inv_initial_samples = lhs.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'maximin LHS')
+plot_searchspace(x, 'maximin LHS')
+print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
 x_label.append("maximin")
 
@@ -114,7 +119,8 @@ def plot_branin(x, title):
 lhs = Lhs(criterion="correlation", iterations=1000)
 inv_initial_samples = lhs.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'correlation LHS')
+plot_searchspace(x, 'correlation LHS')
+print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
 x_label.append("corr")
 
@@ -125,7 +131,8 @@ def plot_branin(x, title):
 lhs = Lhs(criterion="ratio", iterations=1000)
 inv_initial_samples = lhs.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'ratio LHS')
+plot_searchspace(x, 'ratio LHS')
+print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
 x_label.append("ratio")
 
@@ -136,7 +143,8 @@ def plot_branin(x, title):
 halton = Halton()
 inv_initial_samples = halton.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'Halton')
+plot_searchspace(x, 'Halton')
+print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
 x_label.append("halton")
 
@@ -147,7 +155,8 @@ def plot_branin(x, title):
 hammersly = Hammersly()
 inv_initial_samples = hammersly.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'Hammersly')
+plot_searchspace(x, 'Hammersly')
+print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
 x_label.append("hammersly")
 
diff --git a/examples/samples/initial-sampling-method.py b/examples/samples/initial-sampling-method.py
index b7fa883a8..822a80e3a 100644
--- a/examples/samples/initial-sampling-method.py
+++ b/examples/samples/initial-sampling-method.py
@@ -36,7 +36,7 @@
 
 #############################################################################
 
-def plot_branin(x, title):
+def ploit_searchspace(x, title):
     fig, ax = plt.subplots()
     plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', label='samples')
     plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', markersize=40, alpha=0.5)
@@ -57,7 +57,7 @@ def plot_branin(x, title):
 # Random sampling
 # ---------------
 x = space.rvs(n_samples)
-plot_branin(x, "Random samples")
+ploit_searchspace(x, "Random samples")
 pdist_data = []
 x_label = []
 pdist_data.append(pdist(x).flatten())
@@ -69,7 +69,7 @@ def plot_branin(x, title):
 sobol = Sobol()
 inv_initial_samples = sobol.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'Sobol')
+ploit_searchspace(x, 'Sobol')
 pdist_data.append(pdist(x).flatten())
 x_label.append("sobol")
 
@@ -81,7 +81,7 @@ def plot_branin(x, title):
 lhs = Lhs(lhs_type="classic")
 inv_initial_samples = lhs.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'classic LHS')
+ploit_searchspace(x, 'classic LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("lhs")
 
@@ -92,7 +92,7 @@ def plot_branin(x, title):
 lhs = Lhs(lhs_type="centered")
 inv_initial_samples = lhs.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'centered LHS')
+ploit_searchspace(x, 'centered LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("center")
 
@@ -103,7 +103,7 @@ def plot_branin(x, title):
 lhs = Lhs(criterion="maximin", iterations=1000)
 inv_initial_samples = lhs.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'maximin LHS')
+ploit_searchspace(x, 'maximin LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("maximin")
 
@@ -114,7 +114,7 @@ def plot_branin(x, title):
 lhs = Lhs(criterion="correlation", iterations=1000)
 inv_initial_samples = lhs.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'correlation LHS')
+ploit_searchspace(x, 'correlation LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("corr")
 
@@ -125,7 +125,7 @@ def plot_branin(x, title):
 lhs = Lhs(criterion="ratio", iterations=1000)
 inv_initial_samples = lhs.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'ratio LHS')
+ploit_searchspace(x, 'ratio LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("ratio")
 
@@ -136,7 +136,7 @@ def plot_branin(x, title):
 halton = Halton()
 inv_initial_samples = halton.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'Halton')
+ploit_searchspace(x, 'Halton')
 pdist_data.append(pdist(x).flatten())
 x_label.append("halton")
 
@@ -147,7 +147,7 @@ def plot_branin(x, title):
 hammersly = Hammersly()
 inv_initial_samples = hammersly.generate(n_dim, n_samples)
 x = space.inverse_transform(inv_initial_samples)
-plot_branin(x, 'Hammersly')
+ploit_searchspace(x, 'Hammersly')
 pdist_data.append(pdist(x).flatten())
 x_label.append("hammersly")
 
diff --git a/examples/samples/sampling_comparison.py b/examples/samples/sampling_comparison.py
new file mode 100644
index 000000000..9bc9d44aa
--- /dev/null
+++ b/examples/samples/sampling_comparison.py
@@ -0,0 +1,195 @@
+"""
+==========================================
+Comparing initial point generation methods
+==========================================
+
+Holger Nahrstaedt 2020
+
+.. currentmodule:: skopt
+
+Bayesian optimization or sequential model-based optimization uses a surrogate
+model to model the expensive to evaluate function `func`. There are several
+choices for what kind of surrogate model to use. This notebook compares the
+performance of:
+
+* Halton sequence,
+* Hammersly sequence,
+* Sobol sequence and
+* Latin hypercube sampling
+
+as initial points. The purely random point generation is used as
+a baseline.
+"""
+
+print(__doc__)
+import numpy as np
+np.random.seed(123)
+import matplotlib.pyplot as plt
+
+#############################################################################
+# Toy model
+# =========
+#
+# We will use the :class:`benchmarks.hart6` function as toy model for the expensive function.
+# In a real world application this function would be unknown and expensive
+# to evaluate.
+
+from skopt.benchmarks import hart6 as hart6_
+# redefined `hart6` to allow adding arbitrary "noise" dimensions
+def hart6(x, noise_level=0.):
+    return hart6_(x[:6]) + noise_level * np.random.randn()
+
+from skopt.benchmarks import branin as _branin
+
+def branin(x, noise_level=0.):
+    return _branin(x) + noise_level * np.random.randn()
+
+#############################################################################
+
+from scipy.optimize import OptimizeResult
+from matplotlib.pyplot import cm
+import time
+from skopt import gp_minimize, forest_minimize, dummy_minimize
+
+def plot_convergence(result_list, true_minimum=None, yscale=None, title="Convergence plot"):
+
+    ax = plt.gca()
+    ax.set_title(title)
+    ax.set_xlabel("Number of calls $n$")
+    ax.set_ylabel(r"$\min f(x)$ after $n$ calls")
+    ax.grid()
+    if yscale is not None:
+        ax.set_yscale(yscale)
+    colors = cm.hsv(np.linspace(0.25, 1.0, len(result_list)))
+
+    for results, color in zip(result_list, colors):
+        name, results = results
+        n_calls = len(results[0].x_iters)
+        iterations = range(1, n_calls + 1)
+        mins = [[np.min(r.func_vals[:i]) for i in iterations]
+                for r in results]
+        ax.plot(iterations, np.mean(mins, axis=0), c=color, label=name)
+        #ax.errorbar(iterations, np.mean(mins, axis=0),
+        #             yerr=np.std(mins, axis=0), c=color, label=name)
+    if true_minimum:
+        ax.axhline(true_minimum, linestyle="--",
+                   color="r", lw=1,
+                   label="True minimum")
+    ax.legend(loc="best")
+    return ax
+
+
+def run(minimizer, initial_point_generator, init_point_gen_kwargs,
+        n_random_starts=10, n_repeats=1):
+    return [minimizer(func, bounds, n_random_starts=n_random_starts,
+                      initial_point_generator=initial_point_generator,
+                      init_point_gen_kwargs=init_point_gen_kwargs,
+                      n_calls=n_calls, random_state=n)
+            for n in range(n_repeats)]
+
+
+def run_measure(initial_point_generator,
+                n_random_starts=10,
+                init_point_gen_kwargs=None):
+    start = time.time()
+    # n_repeats must set to a much higher value to obtain meaningful results.
+    n_repeats = 1
+    res = run(gp_minimize, initial_point_generator, init_point_gen_kwargs,
+              n_random_starts=n_random_starts, n_repeats=n_repeats)
+    duration = time.time() - start
+    # print("%s %s: %.2f s" % (initial_point_generator,
+    #                          str(init_point_gen_kwargs),
+    #                          duration))
+    return res
+#############################################################################
+# Objective
+# =========
+#
+# The objective of this example is to find one of these minima in as
+# few iterations as possible. One iteration is defined as one call
+# to the :class:`benchmarks.hart6` function.
+#
+# We will evaluate each model several times using a different seed for the
+# random number generator. Then compare the average performance of these
+# models. This makes the comparison more robust against models that get
+# "lucky".
+
+from functools import partial
+example = "hart6"
+
+if example == "hart6":
+    func = partial(hart6, noise_level=0.1)
+    bounds = [(0., 1.), ] * 6
+    true_minimum = -3.32237
+    n_calls = 40
+    n_random_starts = 10
+    yscale = None
+    title = "Convergence plot - hart6"
+else:
+    func = partial(branin, noise_level=2.0)
+    bounds = [(-5.0, 10.0), (0.0, 15.0)]
+    true_minimum = 0.397887
+    n_calls = 30
+    n_random_starts = 10
+    yscale="log"
+    title = "Convergence plot - branin"
+
+#############################################################################
+
+
+# Random search
+dummy_res = run_measure( "random", n_random_starts)
+lhs_res = run_measure("lhs", n_random_starts,
+                      {"lhs_type": "classic",
+                       "criterion": None})
+lhs2_res = run_measure("lhs", n_random_starts,
+                       {"criterion": "maximin"})
+sobol_res = run_measure("sobol", n_random_starts,
+                        {"randomize": False,
+                         "min_skip": 1, "max_skip": 100})
+halton_res = run_measure("halton", n_random_starts)
+hammersly_res = run_measure("hammersly", n_random_starts)
+
+#############################################################################
+# Note that this can take a few minutes.
+
+plot = plot_convergence([("random", dummy_res),
+                        ("lhs", lhs_res),
+                        ("lhs_maximin", lhs2_res),
+                        ("sobol", sobol_res),
+                        ("halton", halton_res),
+                        ("hammersly", hammersly_res)],
+                        true_minimum=true_minimum,
+                        yscale=yscale,
+                        title=title)
+
+plt.show()
+
+#############################################################################
+# This plot shows the value of the minimum found (y axis) as a function
+# of the number of iterations performed so far (x axis). The dashed red line
+# indicates the true value of the minimum of the :class:`benchmarks.hart6`
+# function.
+
+#############################################################################
+# Test with different n_random_starts values
+lhs2_15_res = run_measure("lhs", 12,
+                          {"criterion": "maximin"})
+lhs2_20_res = run_measure("lhs", 14,
+                          {"criterion": "maximin"})
+lhs2_25_res = run_measure("lhs", 16,
+                          {"criterion": "maximin"})
+
+#############################################################################
+# n_random_starts = 10 produces the best results
+
+plot = plot_convergence([("random - 10", dummy_res),
+                        ("lhs_maximin - 10", lhs2_res),
+                        ("lhs_maximin - 12", lhs2_15_res),
+                        ("lhs_maximin - 14", lhs2_20_res),
+                        ("lhs_maximin - 16", lhs2_25_res)],
+                        true_minimum=true_minimum,
+                        yscale=yscale,
+                        title=title)
+
+plt.show()
\ No newline at end of file

From 249c0796bfc9d79d2800b78e73ad58b3b34b5eba Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 18 Feb 2020 17:15:36 +0100
Subject: [PATCH 071/265] Add pyproject toml and add classifier to setup.py

---
 MANIFEST.in    |  4 +++-
 pyproject.toml | 11 +++++++++++
 setup.py       | 20 +++++++++++++++++++-
 3 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/MANIFEST.in b/MANIFEST.in
index 95de3d03e..77406979c 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,4 +2,6 @@ include *.md
 include *.rst
 recursive-include doc *
 recursive-include examples *
-include README.rst
\ No newline at end of file
+include COPYING
+include README.rst
+include pyproject.toml
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..ccba6e074
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,11 @@
+[build-system]
+# Minimum requirements for the build system to execute.
+requires = [
+    "setuptools",
+    "wheel",
+    "numpy",
+    "scipy>=0.14",
+    "scikit-learn>=0.19.1",
+    "pyaml",
+    "joblib>=0.11",
+]
\ No newline at end of file
diff --git a/setup.py b/setup.py
index b66a93600..7023e7972 100644
--- a/setup.py
+++ b/setup.py
@@ -17,13 +17,30 @@
 
 VERSION = skopt.__version__
 
+CLASSIFIERS = ['Intended Audience :: Science/Research',
+               'Intended Audience :: Developers',
+               'License :: OSI Approved :: BSD License',
+               'Programming Language :: Python',
+               'Topic :: Software Development',
+               'Topic :: Scientific/Engineering',
+               'Operating System :: Microsoft :: Windows',
+               'Operating System :: POSIX',
+               'Operating System :: Unix',
+               'Operating System :: MacOS',
+               'Programming Language :: Python :: 3.5',
+               'Programming Language :: Python :: 3.6',
+               'Programming Language :: Python :: 3.7',
+               'Programming Language :: Python :: 3.8']
+
+
 setup(name='scikit-optimize',
       version=VERSION,
       description='Sequential model-based optimization toolbox.',
       long_description=open('README.rst').read(),
       url='https://scikit-optimize.github.io/',
-      license='BSD 3-clause "New" or "Revised License"',
+      license='BSD 3-clause',
       author='The scikit-optimize contributors',
+      classifiers=CLASSIFIERS,
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
                 'skopt.learning.gaussian_process'],
       install_requires=['joblib', 'pyaml', 'numpy', 'scipy>=0.14.0',
@@ -31,4 +48,5 @@
       extras_require={
         'plots':  ["matplotlib"]
         }
+
       )

From 81b6ac19d533c3e0a02d512b002a06e2a1f74604 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 18 Feb 2020 22:59:06 +0100
Subject: [PATCH 072/265] Give initial point generators the dimensions instead
 of n_dim

inverse transformation is performed inside the InitialpointGenerators
Adapt examples and doc
---
 .../initial-sampling-method-integer.py        | 28 +++---
 examples/samples/initial-sampling-method.py   | 28 +++---
 skopt/optimizer/base.py                       |  4 -
 skopt/optimizer/dummy.py                      |  4 -
 skopt/optimizer/forest.py                     |  4 -
 skopt/optimizer/gbrt.py                       |  4 -
 skopt/optimizer/gp.py                         |  4 -
 skopt/optimizer/optimizer.py                  | 13 +--
 skopt/samples/base.py                         |  2 +-
 skopt/samples/halton.py                       | 21 ++++-
 skopt/samples/hammersly.py                    | 29 ++++--
 skopt/samples/lhs.py                          | 90 +++++++++++++------
 skopt/samples/sobol.py                        | 23 +++--
 skopt/space/space.py                          | 35 ++++++--
 skopt/tests/test_gp_opt.py                    |  7 ++
 skopt/tests/test_samples.py                   | 31 ++++---
 skopt/tests/test_space.py                     |  5 ++
 17 files changed, 203 insertions(+), 129 deletions(-)

diff --git a/examples/samples/initial-sampling-method-integer.py b/examples/samples/initial-sampling-method-integer.py
index 6dfbd746d..66467c5b5 100644
--- a/examples/samples/initial-sampling-method-integer.py
+++ b/examples/samples/initial-sampling-method-integer.py
@@ -68,8 +68,7 @@ def plot_searchspace(x, title):
 # -----
 
 sobol = Sobol()
-inv_initial_samples = sobol.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = sobol.generate(space.dimensions, n_samples)
 plot_searchspace(x, 'Sobol')
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
@@ -80,9 +79,8 @@ def plot_searchspace(x, title):
 # Classic latin hypercube sampling
 # --------------------------------
 
-lhs = Lhs(lhs_type="classic")
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+lhs = Lhs(lhs_type="classic", criterion=None)
+x = lhs.generate(space.dimensions, n_samples)
 plot_searchspace(x, 'classic LHS')
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
@@ -92,9 +90,8 @@ def plot_searchspace(x, title):
 # Centered latin hypercube sampling
 # ---------------------------------
 
-lhs = Lhs(lhs_type="centered")
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+lhs = Lhs(lhs_type="centered", criterion=None)
+x = lhs.generate(space.dimensions, n_samples)
 plot_searchspace(x, 'centered LHS')
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
@@ -105,8 +102,7 @@ def plot_searchspace(x, title):
 # ------------------------------------
 
 lhs = Lhs(criterion="maximin", iterations=1000)
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = lhs.generate(space.dimensions, n_samples)
 plot_searchspace(x, 'maximin LHS')
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
@@ -117,8 +113,7 @@ def plot_searchspace(x, title):
 # ----------------------------------------
 
 lhs = Lhs(criterion="correlation", iterations=1000)
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = lhs.generate(space.dimensions, n_samples)
 plot_searchspace(x, 'correlation LHS')
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
@@ -129,8 +124,7 @@ def plot_searchspace(x, title):
 # ----------------------------------
 
 lhs = Lhs(criterion="ratio", iterations=1000)
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = lhs.generate(space.dimensions, n_samples)
 plot_searchspace(x, 'ratio LHS')
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
@@ -141,8 +135,7 @@ def plot_searchspace(x, title):
 # ---------------
 
 halton = Halton()
-inv_initial_samples = halton.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = halton.generate(space.dimensions, n_samples)
 plot_searchspace(x, 'Halton')
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
@@ -153,8 +146,7 @@ def plot_searchspace(x, title):
 # ------------------
 
 hammersly = Hammersly()
-inv_initial_samples = hammersly.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = hammersly.generate(space.dimensions, n_samples)
 plot_searchspace(x, 'Hammersly')
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
diff --git a/examples/samples/initial-sampling-method.py b/examples/samples/initial-sampling-method.py
index 822a80e3a..fde5b3209 100644
--- a/examples/samples/initial-sampling-method.py
+++ b/examples/samples/initial-sampling-method.py
@@ -67,8 +67,7 @@ def ploit_searchspace(x, title):
 # -----
 
 sobol = Sobol()
-inv_initial_samples = sobol.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = sobol.generate(space.dimensions, n_samples)
 ploit_searchspace(x, 'Sobol')
 pdist_data.append(pdist(x).flatten())
 x_label.append("sobol")
@@ -78,9 +77,8 @@ def ploit_searchspace(x, title):
 # Classic Latin hypercube sampling
 # --------------------------------
 
-lhs = Lhs(lhs_type="classic")
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+lhs = Lhs(lhs_type="classic", criterion=None)
+x = lhs.generate(space.dimensions, n_samples)
 ploit_searchspace(x, 'classic LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("lhs")
@@ -89,9 +87,8 @@ def ploit_searchspace(x, title):
 # Centered Latin hypercube sampling
 # ---------------------------------
 
-lhs = Lhs(lhs_type="centered")
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+lhs = Lhs(lhs_type="centered", criterion=None)
+x = lhs.generate(space.dimensions, n_samples)
 ploit_searchspace(x, 'centered LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("center")
@@ -101,8 +98,7 @@ def ploit_searchspace(x, title):
 # ------------------------------------
 
 lhs = Lhs(criterion="maximin", iterations=1000)
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = lhs.generate(space.dimensions, n_samples)
 ploit_searchspace(x, 'maximin LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("maximin")
@@ -112,8 +108,7 @@ def ploit_searchspace(x, title):
 # ----------------------------------------
 
 lhs = Lhs(criterion="correlation", iterations=1000)
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = lhs.generate(space.dimensions, n_samples)
 ploit_searchspace(x, 'correlation LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("corr")
@@ -123,8 +118,7 @@ def ploit_searchspace(x, title):
 # ----------------------------------
 
 lhs = Lhs(criterion="ratio", iterations=1000)
-inv_initial_samples = lhs.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = lhs.generate(space.dimensions, n_samples)
 ploit_searchspace(x, 'ratio LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("ratio")
@@ -134,8 +128,7 @@ def ploit_searchspace(x, title):
 # ---------------
 
 halton = Halton()
-inv_initial_samples = halton.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = halton.generate(space.dimensions, n_samples)
 ploit_searchspace(x, 'Halton')
 pdist_data.append(pdist(x).flatten())
 x_label.append("halton")
@@ -145,8 +138,7 @@ def ploit_searchspace(x, title):
 # ------------------
 
 hammersly = Hammersly()
-inv_initial_samples = hammersly.generate(n_dim, n_samples)
-x = space.inverse_transform(inv_initial_samples)
+x = hammersly.generate(space.dimensions, n_samples)
 ploit_searchspace(x, 'Hammersly')
 pdist_data.append(pdist(x).flatten())
 x_label.append("hammersly")
diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index 3f751259c..259dc5076 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -74,13 +74,9 @@ def base_minimize(func, dimensions, base_estimator,
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
-
         - "sobol" for a Sobol sequence,
-
         - "halton" for a Halton sequence,
-
         - "hammersly" for a Hammersly sequence,
-
         - "lhs" for a latin hypercube sequence,
 
     acq_func : string, default=`"EI"`
diff --git a/skopt/optimizer/dummy.py b/skopt/optimizer/dummy.py
index f17837c10..6bf43caa9 100644
--- a/skopt/optimizer/dummy.py
+++ b/skopt/optimizer/dummy.py
@@ -40,13 +40,9 @@ def dummy_minimize(func, dimensions, n_calls=100,
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
-
         - "sobol" for a Sobol sequence,
-
         - "halton" for a Halton sequence,
-
         - "hammersly" for a Hammersly sequence,
-
         - "lhs" for a latin hypercube sequence,
 
     x0 : list, list of lists or `None`
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index a44b42cbd..ad51ef72a 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -82,13 +82,9 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
-
         - "sobol" for a Sobol sequence,
-
         - "halton" for a Halton sequence,
-
         - "hammersly" for a Hammersly sequence,
-
         - "lhs" for a latin hypercube sequence,
 
     acq_func : string, default="LCB"
diff --git a/skopt/optimizer/gbrt.py b/skopt/optimizer/gbrt.py
index b0d9c11f7..18f51ce94 100644
--- a/skopt/optimizer/gbrt.py
+++ b/skopt/optimizer/gbrt.py
@@ -68,13 +68,9 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
-
         - "sobol" for a Sobol sequence,
-
         - "halton" for a Halton sequence,
-
         - "hammersly" for a Hammersly sequence,
-
         - "lhs" for a latin hypercube sequence,
 
     acq_func : string, default=`"LCB"`
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index 1bc37c0e3..a6091578d 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -94,13 +94,9 @@ def gp_minimize(func, dimensions, base_estimator=None,
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
-
         - "sobol" for a Sobol sequence,
-
         - "halton" for a Halton sequence,
-
         - "hammersly" for a Hammersly sequence,
-
         - "lhs" for a latin hypercube sequence,
 
     acq_func : string, default=`"gp_hedge"`
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index 73296e47c..be22006a2 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -74,13 +74,9 @@ class Optimizer(object):
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
-
         - "sobol" for a Sobol sequence,
-
         - "halton" for a Halton sequence,
-
         - "hammersly" for a Hammersly sequence,
-
         - "lhs" for a latin hypercube sequence
 
     acq_func : string, default=`"gp_hedge"`
@@ -292,14 +288,9 @@ def __init__(self, dimensions, base_estimator="gp",
                     "Unkown initial_point_generator: " +
                     str(initial_point_generator)
                 )
-            inv_initial_samples = self._initial_point_generator.generate(
-                self.space.n_dims, n_initial_points,
+            self._initial_samples = self._initial_point_generator.generate(
+                self.space.dimensions, n_initial_points,
                 random_state=self.rng.randint(0, np.iinfo(np.int32).max))
-            transformer = self.space.get_transformer()
-            self.space.set_transformer("normalize")
-            self._initial_samples = self.space.inverse_transform(
-                inv_initial_samples)
-            self.space.set_transformer(transformer)
 
         # record categorical and non-categorical indices
         self._cat_inds = []
diff --git a/skopt/samples/base.py b/skopt/samples/base.py
index 0bc23d8d1..7811328c3 100644
--- a/skopt/samples/base.py
+++ b/skopt/samples/base.py
@@ -1,4 +1,4 @@
 
 class InitialPointGenerator(object):
-    def generate(self, n_dim, n_samples, random_state=None):
+    def generate(self, dimensions, n_samples, random_state=None):
         raise NotImplemented
diff --git a/skopt/samples/halton.py b/skopt/samples/halton.py
index 8d43f4262..964f1acdb 100644
--- a/skopt/samples/halton.py
+++ b/skopt/samples/halton.py
@@ -4,6 +4,7 @@
 """
 import numpy as np
 from .base import InitialPointGenerator
+from ..space import Space
 from sklearn.utils import check_random_state
 
 
@@ -36,13 +37,22 @@ def __init__(self, min_skip=-1, max_skip=-1, primes=None):
         self.min_skip = min_skip
         self.max_skip = max_skip
 
-    def generate(self, n_dim, n_samples, random_state=None):
+    def generate(self, dimensions, n_samples, random_state=None):
         """Creates samples from Halton set.
 
         Parameters
         ----------
-        n_dim : int
-           The number of dimension
+        dimensions : list, shape (n_dims,)
+            List of search space dimensions.
+            Each search dimension can be defined either as
+
+            - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
+              dimensions),
+            - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
+              dimensions),
+            - as a list of categories (for `Categorical` dimensions), or
+            - an instance of a `Dimension` object (`Real`, `Integer` or
+              `Categorical`).
         n_samples : int
             The order of the Halton sequence. Defines the number of samples.
         random_state : int, RandomState instance, or None (default)
@@ -58,6 +68,9 @@ def generate(self, n_dim, n_samples, random_state=None):
             primes = []
         else:
             primes = list(self.primes)
+        space = Space(dimensions)
+        n_dim = space.n_dims
+        space.set_transformer("normalize")
         if len(primes) < n_dim:
             prime_order = 10 * n_dim
             while len(primes) < n_dim:
@@ -80,7 +93,7 @@ def generate(self, n_dim, n_samples, random_state=None):
         for dim_ in range(n_dim):
             out[dim_] = _van_der_corput_samples(
                 indices, number_base=primes[dim_])
-        return np.transpose(out)
+        return space.inverse_transform(np.transpose(out))
 
 
 def _van_der_corput_samples(idx, number_base=2):
diff --git a/skopt/samples/hammersly.py b/skopt/samples/hammersly.py
index efb320bb7..0db6fba1f 100644
--- a/skopt/samples/hammersly.py
+++ b/skopt/samples/hammersly.py
@@ -4,6 +4,7 @@
 """
 import numpy as np
 from .halton import Halton
+from ..space import Space
 from .base import InitialPointGenerator
 
 
@@ -37,13 +38,22 @@ def __init__(self, min_skip=-1, max_skip=-1, primes=None):
         self.min_skip = min_skip
         self.max_skip = max_skip
 
-    def generate(self, n_dim, n_samples, random_state=None):
+    def generate(self, dimensions, n_samples, random_state=None):
         """Creates samples from Hammersly set.
 
         Parameters
         ----------
-        n_dim : int
-           The number of dimension
+        dimensions : list, shape (n_dims,)
+            List of search space dimensions.
+            Each search dimension can be defined either as
+
+            - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
+              dimensions),
+            - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
+              dimensions),
+            - as a list of categories (for `Categorical` dimensions), or
+            - an instance of a `Dimension` object (`Real`, `Integer` or
+              `Categorical`).
         n_samples : int
             The order of the Hammersley sequence.
             Defines the number of samples.
@@ -59,13 +69,16 @@ def generate(self, n_dim, n_samples, random_state=None):
 
         halton = Halton(min_skip=self.min_skip, max_skip=self.max_skip,
                         primes=self.primes)
-
+        space = Space(dimensions)
+        n_dim = space.n_dims
+        space.set_transformer("normalize")
         if n_dim == 1:
-            return halton.generate(n_dim, n_samples,
+            return halton.generate(dimensions, n_samples,
                                    random_state=random_state)
         out = np.empty((n_dim, n_samples), dtype=float)
-        out[:n_dim - 1] = halton.generate(n_dim - 1, n_samples,
-                                          random_state=random_state).T
+        out[:n_dim - 1] = np.array(halton.generate(
+            [(0., 1.), ] * (n_dim - 1), n_samples,
+            random_state=random_state)).T
 
         out[n_dim - 1] = np.linspace(0, 1, n_samples + 2)[1:-1]
-        return out.T
+        return space.inverse_transform(out.T)
diff --git a/skopt/samples/lhs.py b/skopt/samples/lhs.py
index 98d448edb..d60163874 100644
--- a/skopt/samples/lhs.py
+++ b/skopt/samples/lhs.py
@@ -7,6 +7,7 @@
 from sklearn.utils import check_random_state
 from scipy import spatial
 from ..utils import random_permute_matrix
+from ..space import Space, Categorical
 from .base import InitialPointGenerator
 
 
@@ -36,13 +37,23 @@ def __init__(self, lhs_type="classic", criterion="maximin",
         self.criterion = criterion
         self.iterations = iterations
 
-    def generate(self, n_dim, n_samples, random_state=None):
+    def generate(self, dimensions, n_samples, random_state=None):
         """Creates latin hypercube samples.
 
         Parameters
         ----------
-        n_dim : int
-           The number of dimension
+        dimensions : list, shape (n_dims,)
+            List of search space dimensions.
+            Each search dimension can be defined either as
+
+            - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
+              dimensions),
+            - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
+              dimensions),
+            - as a list of categories (for `Categorical` dimensions), or
+            - an instance of a `Dimension` object (`Real`, `Integer` or
+              `Categorical`).
+
         n_samples : int
             The order of the LHS sequence. Defines the number of samples.
         random_state : int, RandomState instance, or None (default)
@@ -55,57 +66,80 @@ def generate(self, n_dim, n_samples, random_state=None):
             LHS set
         """
         rng = check_random_state(random_state)
-        if self.criterion is None:
-            x = np.linspace(0, 1, n_samples + 1)
-            u = rng.rand(n_samples, n_dim)
-            h = np.zeros_like(u)
-            if self.lhs_type == "centered":
-                for j in range(n_dim):
-                    h[:, j] = np.diff(x) / 2.0 + x[:n_samples]
-            elif self.lhs_type == "classic":
-                for j in range(n_dim):
-                    h[:, j] = u[:, j] * np.diff(x) + x[:n_samples]
-            else:
-                raise ValueError("Wrong lhs_type. Got ".format(self.lhs_type))
-            return random_permute_matrix(h, random_state=random_state)
+        space = Space(dimensions)
+        if space.is_partly_categorical:
+            space_cat_int = Space(dimensions)
+            space_cat_int.set_transformer("identity")
+            space_cat_int.set_transformer_by_type("label", Categorical)
+        n_dim = space.n_dims
+        space.set_transformer("normalize")
+        if self.criterion is None or n_samples == 1:
+            h = self._lhs_normalized(n_dim, n_samples, random_state)
+            return space.inverse_transform(h)
         else:
-            internal_lhs = Lhs(lhs_type=self.lhs_type, criterion=None)
-            h_opt = internal_lhs.generate(n_dim, n_samples,
-                                          random_state=random_state)
+            h_opt = self._lhs_normalized(n_dim, n_samples, random_state)
+
             if self.criterion == "correlation":
                 mincorr = np.inf
                 for i in range(self.iterations):
                     # Generate a random LHS
-                    h = internal_lhs.generate(n_dim, n_samples,
-                                              random_state=random_state)
-                    r = np.corrcoef(h.T)
+                    h = self._lhs_normalized(n_dim, n_samples, random_state)
+                    h = space.inverse_transform(h)
+                    if space.is_partly_categorical:
+                        h = space_cat_int.transform(h)
+                    r = np.corrcoef(np.array(h).T)
                     if np.max(np.abs(r[r != 1])) < mincorr:
                         mincorr = np.max(np.abs(r - np.eye(r.shape[0])))
                         h_opt = h.copy()
+                        if space.is_partly_categorical:
+                            h_opt = space_cat_int.inverse_transform(h_opt)
 
             elif self.criterion == "maximin":
                 maxdist = 0
                 # Maximize the minimum distance between points
                 for i in range(self.iterations):
-                    h = internal_lhs.generate(n_dim, n_samples,
-                                              random_state=random_state)
-                    d = spatial.distance.pdist(h, 'euclidean')
+                    h = self._lhs_normalized(n_dim, n_samples, random_state)
+                    h = space.inverse_transform(h)
+                    if space.is_partly_categorical:
+                        h = space_cat_int.transform(h)
+                    d = spatial.distance.pdist(np.array(h), 'euclidean')
                     if maxdist < np.min(d):
                         maxdist = np.min(d)
                         h_opt = h.copy()
+                        if space.is_partly_categorical:
+                            h_opt = space_cat_int.inverse_transform(h_opt)
             elif self.criterion == "ratio":
                 minratio = np.inf
 
                 # Maximize the minimum distance between points
                 for i in range(self.iterations):
-                    h = internal_lhs.generate(n_dim, n_samples,
-                                              random_state=random_state)
-                    p = spatial.distance.pdist(h, 'euclidean')
+                    h = self._lhs_normalized(n_dim, n_samples, random_state)
+                    h = space.inverse_transform(h)
+                    if space.is_partly_categorical:
+                        h = space_cat_int.transform(h)
+                    p = spatial.distance.pdist(np.array(h), 'euclidean')
                     ratio = np.max(p) / np.min(p)
                     if minratio > ratio:
                         minratio = ratio
                         h_opt = h.copy()
+                        if space.is_partly_categorical:
+                            h_opt = space_cat_int.inverse_transform(h_opt)
             else:
                 raise ValueError("Wrong criterion."
                                  "Got {}".format(self.criterion))
             return h_opt
+
+    def _lhs_normalized(self, n_dim, n_samples, random_state):
+        rng = check_random_state(random_state)
+        x = np.linspace(0, 1, n_samples + 1)
+        u = rng.rand(n_samples, n_dim)
+        h = np.zeros_like(u)
+        if self.lhs_type == "centered":
+            for j in range(n_dim):
+                h[:, j] = np.diff(x) / 2.0 + x[:n_samples]
+        elif self.lhs_type == "classic":
+            for j in range(n_dim):
+                h[:, j] = u[:, j] * np.diff(x) + x[:n_samples]
+        else:
+            raise ValueError("Wrong lhs_type. Got ".format(self.lhs_type))
+        return random_permute_matrix(h, random_state=random_state)
diff --git a/skopt/samples/sobol.py b/skopt/samples/sobol.py
index ed52e16d1..680d724a1 100644
--- a/skopt/samples/sobol.py
+++ b/skopt/samples/sobol.py
@@ -19,6 +19,7 @@
 import numpy as np
 from scipy.stats import norm
 from .base import InitialPointGenerator
+from ..space import Space
 from sklearn.utils import check_random_state
 
 
@@ -186,13 +187,22 @@ def init(self, dim_num):
         self.recipd = 1.0 / (2 * p2)
         self.lastq = np.zeros(dim_num)
 
-    def generate(self, n_dim, n_samples, random_state=None):
+    def generate(self, dimensions, n_samples, random_state=None):
         """Creates samples from Sobol set.
 
         Parameters
         ----------
-        n_dim : int
-           The number of dimension
+        dimensions : list, shape (n_dims,)
+            List of search space dimensions.
+            Each search dimension can be defined either as
+
+            - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
+              dimensions),
+            - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
+              dimensions),
+            - as a list of categories (for `Categorical` dimensions), or
+            - an instance of a `Dimension` object (`Real`, `Integer` or
+              `Categorical`).
         n_samples : int
             The order of the Sobol sequence. Defines the number of samples.
         random_state : int, RandomState instance, or None (default)
@@ -205,6 +215,9 @@ def generate(self, n_dim, n_samples, random_state=None):
             Sobol set
         """
         rng = check_random_state(random_state)
+        space = Space(dimensions)
+        n_dim = space.n_dims
+        space.set_transformer("normalize")
         r = np.full((n_samples, n_dim), np.nan)
         if self.min_skip == self.max_skip:
             seed = self.min_skip
@@ -213,8 +226,8 @@ def generate(self, n_dim, n_samples, random_state=None):
         for j in range(n_samples):
             r[j, 0:n_dim], seed = self._sobol(n_dim, seed)
         if self.randomize:
-            return _random_shift(r, random_state)
-        return r
+            return space.inverse_transform(_random_shift(r, random_state))
+        return space.inverse_transform(r)
 
     def _sobol(self, dim_num, seed):
         """Generates a new quasirandom Sobol vector with each call.
diff --git a/skopt/space/space.py b/skopt/space/space.py
index 9bd4ecd3d..1b7f49ea3 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -47,11 +47,12 @@ def check_dimension(dimension, transform=None):
         - an instance of a `Dimension` object (`Real`, `Integer` or
           `Categorical`).
 
-    transform : "identity", "normalize", "string", "onehot" optional
+    transform : "identity", "normalize", "string", "label", "onehot" optional
         - For `Categorical` dimensions, the following transformations are
           supported.
 
           - "onehot" (default) one-hot transformation of the original space.
+          - "label" integer transformation of the original space
           - "string" string transformation of the original space.
           - "identity" same as the original space.
 
@@ -544,11 +545,13 @@ class Categorical(Dimension):
         Prior probabilities for each category. By default all categories
         are equally likely.
 
-    transform : "onehot", "string", "identity", default="onehot"
+    transform : "onehot", "string", "identity", "label", default="onehot"
         - "identity", the transformed space is the same as the original
           space.
         -  "string",  the transformed space is a string encoded
           representation of the original space.
+        - "label", the transformed space is a label encoded
+          representation (integer) of the original space.
         - "onehot", the transformed space is a one-hot encoded
           representation of the original space.
 
@@ -580,19 +583,23 @@ def set_transformer(self, transform="onehot"):
         Parameters
         ----------
         transform : str
-           Can be 'normalize', 'onehot', 'string' or 'identity'
+           Can be 'normalize', 'onehot', 'string', 'label', or 'identity'
 
         """
         self.transform_ = transform
-        if transform not in ["identity", "onehot", "string", "normalize"]:
-            raise ValueError("Expected transform to be 'identity', 'string' or"
-                             "'onehot' got {}".format(transform))
+        if transform not in ["identity", "onehot", "string", "normalize",
+                             "label"]:
+            raise ValueError("Expected transform to be 'identity', 'string',"
+                             "'label' or 'onehot' got {}".format(transform))
         if transform == "onehot":
             self.transformer = CategoricalEncoder()
             self.transformer.fit(self.categories)
         elif transform == "string":
             self.transformer = StringEncoder()
             self.transformer.fit(self.categories)
+        elif transform == "label":
+            self.transformer = LabelEncoder()
+            self.transformer.fit(self.categories)
         elif transform == "normalize":
             self.transformer = Pipeline(
                 [LabelEncoder(list(self.categories)),
@@ -859,6 +866,22 @@ def set_transformer(self, transform):
             else:
                 self.dimensions[j].set_transformer(transform)
 
+    def set_transformer_by_type(self, transform, dim_type):
+        """Sets the transformer of `dim_type` objects to `transform`
+
+        Parameters
+        ----------
+        transform : str
+           Sets all transformer of type `dim_type` to `transform`
+        dim_type : type
+            Can be `skopt.space.Real`, `skopt.space.Integer` or
+             `skopt.space.Categorical`
+        """
+        # Transform
+        for j in range(self.n_dims):
+            if isinstance(self.dimensions[j], dim_type):
+                self.dimensions[j].set_transformer(transform)
+
     def get_transformer(self):
         """Returns all transformers as list"""
         transformer = []
diff --git a/skopt/tests/test_gp_opt.py b/skopt/tests/test_gp_opt.py
index b09785a5b..49dae0cce 100644
--- a/skopt/tests/test_gp_opt.py
+++ b/skopt/tests/test_gp_opt.py
@@ -133,6 +133,9 @@ def objective(param_list):
 
     res = gp_minimize(objective, space, n_calls=12, random_state=1)
     assert res["x"] == ['1', 4, 1.0]
+    res = gp_minimize(objective, space, n_calls=12, random_state=1,
+                      initial_point_generator="lhs")
+    assert res["x"] == ['1', 4, 1.0]
 
 
 def test_mixed_categoricals2():
@@ -149,3 +152,7 @@ def objective(param_list):
 
     res = gp_minimize(objective, space, n_calls=12, random_state=1)
     assert res["x"] == ['1', 4]
+
+    res = gp_minimize(objective, space, n_calls=12, random_state=1,
+                      initial_point_generator="lhs")
+    assert res["x"] == ['1', 4]
diff --git a/skopt/tests/test_samples.py b/skopt/tests/test_samples.py
index b0f8c2a32..3284e882d 100644
--- a/skopt/tests/test_samples.py
+++ b/skopt/tests/test_samples.py
@@ -25,18 +25,28 @@
 @pytest.mark.fast_test
 def test_lhs_type():
     lhs = Lhs(lhs_type="classic")
-    samples = lhs.generate(2, 200)
+    samples = lhs.generate([(0., 1.), ] * 2, 200)
     assert len(samples) == 200
     assert len(samples[0]) == 2
     lhs = Lhs(lhs_type="centered")
-    samples = lhs.generate(3, 3)
+    samples = lhs.generate([(0., 1.), ] * 3, 3)
     assert_almost_equal(np.sum(samples), 4.5)
+    samples = lhs.generate([("a", "b", "c")], 3)
+    assert samples[0][0] in ["a", "b", "c"]
+
+    samples = lhs.generate([("a", "b", "c"), (0, 1)], 1)
+    assert samples[0][0] in ["a", "b", "c"]
+    assert samples[0][1] in [0, 1]
+
+    samples = lhs.generate([("a", "b", "c"), (0, 1)], 3)
+    assert samples[0][0] in ["a", "b", "c"]
+    assert samples[0][1] in [0, 1]
 
 
 def test_lhs_criterion():
     for criterion in ["maximin", "ratio", "correlation"]:
         lhs = Lhs(criterion=criterion, iterations=100)
-        samples = lhs.generate(2, 200)
+        samples = lhs.generate([(0., 1.), ] * 2, 200)
         assert len(samples) == 200
         assert len(samples[0]) == 2
 
@@ -74,7 +84,8 @@ def test_sobol():
 @pytest.mark.fast_test
 def test_generate():
     sobol = Sobol(min_skip=1, max_skip=1)
-    x = sobol.generate(3, 3)
+    x = sobol.generate([(0., 1.), ] * 3, 3)
+    x = np.array(x)
     assert_array_equal(x[0, :], [0.5, 0.5, 0.5])
     assert_array_equal(x[1, :], [0.75, 0.25, 0.75])
     assert_array_equal(x[2, :], [0.25, 0.75, 0.25])
@@ -94,17 +105,17 @@ def test_van_der_corput():
 @pytest.mark.fast_test
 def test_halton():
     h = Halton()
-    x = h.generate(2, 3)
+    x = h.generate([(0., 1.), ] * 2, 3)
     y = np.array([[0.125, 0.625, 0.375], [0.4444, 0.7778, 0.2222]]).T
     assert_array_almost_equal(x, y, 1e-3)
 
     h = Halton()
-    x = h.generate(2, 4)
+    x = h.generate([(0., 1.), ] * 2, 4)
     y = np.array([[0.125, 0.625, 0.375, 0.875],
                   [0.4444, 0.7778, 0.2222, 0.5556]]).T
     assert_array_almost_equal(x, y, 1e-3)
 
-    samples = h.generate(2, 200)
+    samples = h.generate([(0., 1.), ] * 2, 200)
     assert len(samples) == 200
     assert len(samples[0]) == 2
 
@@ -112,14 +123,14 @@ def test_halton():
 @pytest.mark.fast_test
 def test_hammersly():
     h = Hammersly()
-    x = h.generate(2, 3)
+    x = h.generate([(0., 1.), ] * 2, 3)
     y = np.array([[0.75, 0.125, 0.625], [0.25, 0.5, 0.75]]).T
     assert_almost_equal(x, y)
-    x = h.generate(2, 4)
+    x = h.generate([(0., 1.), ] * 2, 4)
     y = np.array([[0.75, 0.125, 0.625, 0.375], [0.2, 0.4, 0.6, 0.8]]).T
     assert_almost_equal(x, y)
 
-    samples = h.generate(2, 200)
+    samples = h.generate([(0., 1.), ] * 2, 200)
     assert len(samples) == 200
     assert len(samples[0]) == 2
 
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index de058436a..0b09788d2 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -378,6 +378,11 @@ def test_set_get_transformer():
     space.set_transformer("normalize")
     transformer = space.get_transformer()
     assert_array_equal(["normalize"] * 5, transformer)
+    space.set_transformer(transformer)
+    assert_array_equal(transformer, space.get_transformer())
+
+    space.set_transformer_by_type("label", Categorical)
+    assert space.dimensions[2].transform(["a"]) == [0]
 
 
 @pytest.mark.fast_test

From 85317dad0531c04e41c94d30e0667032f7d17aff Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 00:53:51 +0100
Subject: [PATCH 073/265] Revert changes to space.dimensions objects

Improve unit tests
---
 skopt/optimizer/optimizer.py |  2 ++
 skopt/samples/halton.py      |  5 ++++-
 skopt/samples/hammersly.py   | 20 ++++++++++--------
 skopt/samples/lhs.py         | 40 +++++++++++++++++++++++++-----------
 skopt/samples/sobol.py       |  7 +++++--
 skopt/tests/test_gp_opt.py   | 30 +++++++++++++++++----------
 6 files changed, 70 insertions(+), 34 deletions(-)

diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index be22006a2..cb0d3dd78 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -288,9 +288,11 @@ def __init__(self, dimensions, base_estimator="gp",
                     "Unkown initial_point_generator: " +
                     str(initial_point_generator)
                 )
+            transformer = self.space.get_transformer()
             self._initial_samples = self._initial_point_generator.generate(
                 self.space.dimensions, n_initial_points,
                 random_state=self.rng.randint(0, np.iinfo(np.int32).max))
+            self.space.set_transformer(transformer)
 
         # record categorical and non-categorical indices
         self._cat_inds = []
diff --git a/skopt/samples/halton.py b/skopt/samples/halton.py
index 964f1acdb..e8ea9fc8d 100644
--- a/skopt/samples/halton.py
+++ b/skopt/samples/halton.py
@@ -70,6 +70,7 @@ def generate(self, dimensions, n_samples, random_state=None):
             primes = list(self.primes)
         space = Space(dimensions)
         n_dim = space.n_dims
+        transformer = space.get_transformer()
         space.set_transformer("normalize")
         if len(primes) < n_dim:
             prime_order = 10 * n_dim
@@ -93,7 +94,9 @@ def generate(self, dimensions, n_samples, random_state=None):
         for dim_ in range(n_dim):
             out[dim_] = _van_der_corput_samples(
                 indices, number_base=primes[dim_])
-        return space.inverse_transform(np.transpose(out))
+        out = space.inverse_transform(np.transpose(out))
+        space.set_transformer(transformer)
+        return out
 
 
 def _van_der_corput_samples(idx, number_base=2):
diff --git a/skopt/samples/hammersly.py b/skopt/samples/hammersly.py
index 0db6fba1f..b33273869 100644
--- a/skopt/samples/hammersly.py
+++ b/skopt/samples/hammersly.py
@@ -71,14 +71,18 @@ def generate(self, dimensions, n_samples, random_state=None):
                         primes=self.primes)
         space = Space(dimensions)
         n_dim = space.n_dims
+        transformer = space.get_transformer()
         space.set_transformer("normalize")
         if n_dim == 1:
-            return halton.generate(dimensions, n_samples,
-                                   random_state=random_state)
-        out = np.empty((n_dim, n_samples), dtype=float)
-        out[:n_dim - 1] = np.array(halton.generate(
-            [(0., 1.), ] * (n_dim - 1), n_samples,
-            random_state=random_state)).T
+            out = halton.generate(dimensions, n_samples,
+                                  random_state=random_state)
+        else:
+            out = np.empty((n_dim, n_samples), dtype=float)
+            out[:n_dim - 1] = np.array(halton.generate(
+                [(0., 1.), ] * (n_dim - 1), n_samples,
+                random_state=random_state)).T
 
-        out[n_dim - 1] = np.linspace(0, 1, n_samples + 2)[1:-1]
-        return space.inverse_transform(out.T)
+            out[n_dim - 1] = np.linspace(0, 1, n_samples + 2)[1:-1]
+            out = space.inverse_transform(out.T)
+        space.set_transformer(transformer)
+        return out
diff --git a/skopt/samples/lhs.py b/skopt/samples/lhs.py
index d60163874..9704dc86c 100644
--- a/skopt/samples/lhs.py
+++ b/skopt/samples/lhs.py
@@ -67,66 +67,82 @@ def generate(self, dimensions, n_samples, random_state=None):
         """
         rng = check_random_state(random_state)
         space = Space(dimensions)
+        transformer = space.get_transformer()
         if space.is_partly_categorical:
-            space_cat_int = Space(dimensions)
-            space_cat_int.set_transformer("identity")
-            space_cat_int.set_transformer_by_type("label", Categorical)
+            space.set_transformer("identity")
+            space.set_transformer_by_type("label", Categorical)
+            trans_cat_int = space.get_transformer()
         n_dim = space.n_dims
         space.set_transformer("normalize")
+        trans_normalize = space.get_transformer()
         if self.criterion is None or n_samples == 1:
             h = self._lhs_normalized(n_dim, n_samples, random_state)
-            return space.inverse_transform(h)
+            h = space.inverse_transform(h)
+            space.set_transformer(transformer)
+            return h
         else:
             h_opt = self._lhs_normalized(n_dim, n_samples, random_state)
+            space.set_transformer(trans_normalize)
+            h_opt = space.inverse_transform(h_opt)
 
             if self.criterion == "correlation":
                 mincorr = np.inf
                 for i in range(self.iterations):
                     # Generate a random LHS
                     h = self._lhs_normalized(n_dim, n_samples, random_state)
+                    space.set_transformer(trans_normalize)
                     h = space.inverse_transform(h)
                     if space.is_partly_categorical:
-                        h = space_cat_int.transform(h)
+                        space.set_transformer(trans_cat_int)
+                        h = space.transform(h)
                     r = np.corrcoef(np.array(h).T)
                     if np.max(np.abs(r[r != 1])) < mincorr:
                         mincorr = np.max(np.abs(r - np.eye(r.shape[0])))
                         h_opt = h.copy()
                         if space.is_partly_categorical:
-                            h_opt = space_cat_int.inverse_transform(h_opt)
+                            space.set_transformer(trans_cat_int)
+                            h_opt = space.inverse_transform(h_opt)
 
             elif self.criterion == "maximin":
                 maxdist = 0
                 # Maximize the minimum distance between points
                 for i in range(self.iterations):
                     h = self._lhs_normalized(n_dim, n_samples, random_state)
+                    space.set_transformer(trans_normalize)
                     h = space.inverse_transform(h)
                     if space.is_partly_categorical:
-                        h = space_cat_int.transform(h)
+                        space.set_transformer(trans_cat_int)
+                        h = space.transform(h)
                     d = spatial.distance.pdist(np.array(h), 'euclidean')
-                    if maxdist < np.min(d):
+                    if maxdist > np.min(d):
                         maxdist = np.min(d)
                         h_opt = h.copy()
                         if space.is_partly_categorical:
-                            h_opt = space_cat_int.inverse_transform(h_opt)
+                            space.set_transformer(trans_cat_int)
+                            h_opt = space.inverse_transform(h_opt)
             elif self.criterion == "ratio":
                 minratio = np.inf
 
                 # Maximize the minimum distance between points
                 for i in range(self.iterations):
                     h = self._lhs_normalized(n_dim, n_samples, random_state)
+                    space.set_transformer(trans_normalize)
                     h = space.inverse_transform(h)
                     if space.is_partly_categorical:
-                        h = space_cat_int.transform(h)
+                        space.set_transformer(trans_cat_int)
+                        h = space.transform(h)
                     p = spatial.distance.pdist(np.array(h), 'euclidean')
                     ratio = np.max(p) / np.min(p)
-                    if minratio > ratio:
+                    if minratio < ratio:
                         minratio = ratio
                         h_opt = h.copy()
                         if space.is_partly_categorical:
-                            h_opt = space_cat_int.inverse_transform(h_opt)
+                            space.set_transformer(trans_cat_int)
+                            h_opt = space.inverse_transform(h_opt)
             else:
                 raise ValueError("Wrong criterion."
                                  "Got {}".format(self.criterion))
+            space.set_transformer(transformer)
             return h_opt
 
     def _lhs_normalized(self, n_dim, n_samples, random_state):
diff --git a/skopt/samples/sobol.py b/skopt/samples/sobol.py
index 680d724a1..2d4cc6502 100644
--- a/skopt/samples/sobol.py
+++ b/skopt/samples/sobol.py
@@ -217,6 +217,7 @@ def generate(self, dimensions, n_samples, random_state=None):
         rng = check_random_state(random_state)
         space = Space(dimensions)
         n_dim = space.n_dims
+        transformer = space.get_transformer()
         space.set_transformer("normalize")
         r = np.full((n_samples, n_dim), np.nan)
         if self.min_skip == self.max_skip:
@@ -226,8 +227,10 @@ def generate(self, dimensions, n_samples, random_state=None):
         for j in range(n_samples):
             r[j, 0:n_dim], seed = self._sobol(n_dim, seed)
         if self.randomize:
-            return space.inverse_transform(_random_shift(r, random_state))
-        return space.inverse_transform(r)
+            r = space.inverse_transform(_random_shift(r, random_state))
+        r = space.inverse_transform(r)
+        space.set_transformer(transformer)
+        return r
 
     def _sobol(self, dim_num, seed):
         """Generates a new quasirandom Sobol vector with each call.
diff --git a/skopt/tests/test_gp_opt.py b/skopt/tests/test_gp_opt.py
index 49dae0cce..f39d09c8c 100644
--- a/skopt/tests/test_gp_opt.py
+++ b/skopt/tests/test_gp_opt.py
@@ -12,16 +12,18 @@
 
 
 def check_minimize(func, y_opt, bounds, acq_optimizer, acq_func,
-                   margin, n_calls, n_random_starts=10):
+                   margin, n_calls, init_gen="random", n_random_starts=10):
     r = gp_minimize(func, bounds, acq_optimizer=acq_optimizer,
                     acq_func=acq_func, n_random_starts=n_random_starts,
                     n_calls=n_calls, random_state=1,
+                    initial_point_generator=init_gen,
                     noise=1e-10)
     assert r.fun < y_opt + margin
 
 
 SEARCH = ["sampling", "lbfgs"]
 ACQUISITION = ["LCB", "EI"]
+INITGEN = ["random", "lhs", "halton", "hammersly", "sobol"]
 
 
 @pytest.mark.slow_test
@@ -32,6 +34,15 @@ def test_gp_minimize_bench1(search, acq):
                    [(-2.0, 2.0)], search, acq, 0.05, 20)
 
 
+@pytest.mark.slow_test
+@pytest.mark.parametrize("search", ["sampling"])
+@pytest.mark.parametrize("acq", ["LCB"])
+@pytest.mark.parametrize("initgen", INITGEN)
+def test_gp_minimize_bench1_initgen(search, acq, initgen):
+    check_minimize(bench1, 0.,
+                   [(-2.0, 2.0)], search, acq, 0.05, 20, initgen)
+
+
 @pytest.mark.slow_test
 @pytest.mark.parametrize("search", SEARCH)
 @pytest.mark.parametrize("acq", ACQUISITION)
@@ -116,7 +127,8 @@ def f(params):
     assert res.x_iters[0][0] == dims[0][0]
 
 
-def test_mixed_categoricals():
+@pytest.mark.parametrize("initgen", INITGEN)
+def test_mixed_categoricals(initgen):
 
     space = Space([
         Categorical(name="x", categories=["1", "2", "3"]),
@@ -131,14 +143,13 @@ def objective(param_list):
         loss = int(x) + y * z
         return loss
 
-    res = gp_minimize(objective, space, n_calls=12, random_state=1)
-    assert res["x"] == ['1', 4, 1.0]
     res = gp_minimize(objective, space, n_calls=12, random_state=1,
-                      initial_point_generator="lhs")
-    assert res["x"] == ['1', 4, 1.0]
+                      initial_point_generator=initgen)
+    assert res["x"] in [['1', 4, 1.0], ['2', 4, 1.0]]
 
 
-def test_mixed_categoricals2():
+@pytest.mark.parametrize("initgen", INITGEN)
+def test_mixed_categoricals2(initgen):
     space = Space([
         Categorical(name="x", categories=["1", "2", "3"]),
         Categorical(name="y", categories=[4, 5, 6])
@@ -150,9 +161,6 @@ def objective(param_list):
         loss = int(x) + y
         return loss
 
-    res = gp_minimize(objective, space, n_calls=12, random_state=1)
-    assert res["x"] == ['1', 4]
-
     res = gp_minimize(objective, space, n_calls=12, random_state=1,
-                      initial_point_generator="lhs")
+                      initial_point_generator=initgen)
     assert res["x"] == ['1', 4]

From 198ddde419c281d5ed698eee0a8f5874cd30e444 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 11:12:08 +0100
Subject: [PATCH 074/265] Fix random_state propagation

Fix normalize in lhs generation
---
 .../initial-sampling-method-integer.py        | 13 ++---
 examples/samples/initial-sampling-method.py   | 35 ++++++------
 examples/samples/sampling_comparison.py       |  1 -
 skopt/samples/hammersly.py                    |  7 ++-
 skopt/samples/lhs.py                          | 56 +++++--------------
 skopt/samples/sobol.py                        |  2 +-
 skopt/tests/test_samples.py                   | 30 +++++++++-
 7 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/examples/samples/initial-sampling-method-integer.py b/examples/samples/initial-sampling-method-integer.py
index 66467c5b5..f32c631d7 100644
--- a/examples/samples/initial-sampling-method-integer.py
+++ b/examples/samples/initial-sampling-method-integer.py
@@ -24,7 +24,7 @@
 
 print(__doc__)
 import numpy as np
-np.random.seed(123)
+np.random.seed(1234)
 import matplotlib.pyplot as plt
 from skopt.space import Space
 from skopt.samples import Sobol
@@ -47,9 +47,8 @@ def plot_searchspace(x, title):
     plt.title(title)
     ax.grid(True)
 
-n_dim = 2
-n_samples = 36
 
+n_samples = 10
 space = Space([(0, 5), (0, 5)])
 space.set_transformer("normalize")
 
@@ -63,6 +62,7 @@ def plot_searchspace(x, title):
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
 x_label.append("random")
+
 #############################################################################
 # Sobol
 # -----
@@ -74,7 +74,6 @@ def plot_searchspace(x, title):
 pdist_data.append(pdist(x).flatten())
 x_label.append("sobol")
 
-
 #############################################################################
 # Classic latin hypercube sampling
 # --------------------------------
@@ -101,7 +100,7 @@ def plot_searchspace(x, title):
 # Maximin optimized hypercube sampling
 # ------------------------------------
 
-lhs = Lhs(criterion="maximin", iterations=1000)
+lhs = Lhs(criterion="maximin", iterations=10000)
 x = lhs.generate(space.dimensions, n_samples)
 plot_searchspace(x, 'maximin LHS')
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
@@ -112,7 +111,7 @@ def plot_searchspace(x, title):
 # Correlation optimized hypercube sampling
 # ----------------------------------------
 
-lhs = Lhs(criterion="correlation", iterations=1000)
+lhs = Lhs(criterion="correlation", iterations=10000)
 x = lhs.generate(space.dimensions, n_samples)
 plot_searchspace(x, 'correlation LHS')
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
@@ -123,7 +122,7 @@ def plot_searchspace(x, title):
 # Ratio optimized hypercube sampling
 # ----------------------------------
 
-lhs = Lhs(criterion="ratio", iterations=1000)
+lhs = Lhs(criterion="ratio", iterations=10000)
 x = lhs.generate(space.dimensions, n_samples)
 plot_searchspace(x, 'ratio LHS')
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
diff --git a/examples/samples/initial-sampling-method.py b/examples/samples/initial-sampling-method.py
index fde5b3209..858bad131 100644
--- a/examples/samples/initial-sampling-method.py
+++ b/examples/samples/initial-sampling-method.py
@@ -36,10 +36,10 @@
 
 #############################################################################
 
-def ploit_searchspace(x, title):
+def plot_searchspace(x, title):
     fig, ax = plt.subplots()
     plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', label='samples')
-    plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', markersize=40, alpha=0.5)
+    plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', markersize=80, alpha=0.5)
     # ax.legend(loc="best", numpoints=1)
     ax.set_xlabel("X1")
     ax.set_xlim([-5, 10])
@@ -47,8 +47,7 @@ def ploit_searchspace(x, title):
     ax.set_ylim([0, 15])
     plt.title(title)
 
-n_dim = 2
-n_samples = 40
+n_samples = 10
 
 space = Space([(-5., 10.), (0., 15.)])
 space.set_transformer("normalize")
@@ -57,29 +56,29 @@ def ploit_searchspace(x, title):
 # Random sampling
 # ---------------
 x = space.rvs(n_samples)
-ploit_searchspace(x, "Random samples")
+plot_searchspace(x, "Random samples")
 pdist_data = []
 x_label = []
 pdist_data.append(pdist(x).flatten())
 x_label.append("random")
+
 #############################################################################
 # Sobol
 # -----
 
 sobol = Sobol()
 x = sobol.generate(space.dimensions, n_samples)
-ploit_searchspace(x, 'Sobol')
+plot_searchspace(x, 'Sobol')
 pdist_data.append(pdist(x).flatten())
 x_label.append("sobol")
 
-
 #############################################################################
 # Classic Latin hypercube sampling
 # --------------------------------
 
 lhs = Lhs(lhs_type="classic", criterion=None)
 x = lhs.generate(space.dimensions, n_samples)
-ploit_searchspace(x, 'classic LHS')
+plot_searchspace(x, 'classic LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("lhs")
 
@@ -89,7 +88,7 @@ def ploit_searchspace(x, title):
 
 lhs = Lhs(lhs_type="centered", criterion=None)
 x = lhs.generate(space.dimensions, n_samples)
-ploit_searchspace(x, 'centered LHS')
+plot_searchspace(x, 'centered LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("center")
 
@@ -97,9 +96,9 @@ def ploit_searchspace(x, title):
 # Maximin optimized hypercube sampling
 # ------------------------------------
 
-lhs = Lhs(criterion="maximin", iterations=1000)
+lhs = Lhs(criterion="maximin", iterations=10000)
 x = lhs.generate(space.dimensions, n_samples)
-ploit_searchspace(x, 'maximin LHS')
+plot_searchspace(x, 'maximin LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("maximin")
 
@@ -107,9 +106,9 @@ def ploit_searchspace(x, title):
 # Correlation optimized hypercube sampling
 # ----------------------------------------
 
-lhs = Lhs(criterion="correlation", iterations=1000)
+lhs = Lhs(criterion="correlation", iterations=10000)
 x = lhs.generate(space.dimensions, n_samples)
-ploit_searchspace(x, 'correlation LHS')
+plot_searchspace(x, 'correlation LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("corr")
 
@@ -117,9 +116,9 @@ def ploit_searchspace(x, title):
 # Ratio optimized hypercube sampling
 # ----------------------------------
 
-lhs = Lhs(criterion="ratio", iterations=1000)
+lhs = Lhs(criterion="ratio", iterations=10000)
 x = lhs.generate(space.dimensions, n_samples)
-ploit_searchspace(x, 'ratio LHS')
+plot_searchspace(x, 'ratio LHS')
 pdist_data.append(pdist(x).flatten())
 x_label.append("ratio")
 
@@ -129,7 +128,7 @@ def ploit_searchspace(x, title):
 
 halton = Halton()
 x = halton.generate(space.dimensions, n_samples)
-ploit_searchspace(x, 'Halton')
+plot_searchspace(x, 'Halton')
 pdist_data.append(pdist(x).flatten())
 x_label.append("halton")
 
@@ -139,7 +138,7 @@ def ploit_searchspace(x, title):
 
 hammersly = Hammersly()
 x = hammersly.generate(space.dimensions, n_samples)
-ploit_searchspace(x, 'Hammersly')
+plot_searchspace(x, 'Hammersly')
 pdist_data.append(pdist(x).flatten())
 x_label.append("hammersly")
 
@@ -156,4 +155,4 @@ def ploit_searchspace(x, title):
 plt.grid(True)
 plt.ylabel("pdist")
 _ = ax.set_ylim(0, 12)
-_ = ax.set_xticklabels(x_label, rotation=45, fontsize=8)
\ No newline at end of file
+_ = ax.set_xticklabels(x_label, rotation=45, fontsize=8)
diff --git a/examples/samples/sampling_comparison.py b/examples/samples/sampling_comparison.py
index 9bc9d44aa..d027ef366 100644
--- a/examples/samples/sampling_comparison.py
+++ b/examples/samples/sampling_comparison.py
@@ -46,7 +46,6 @@ def branin(x, noise_level=0.):
 
 #############################################################################
 
-from scipy.optimize import OptimizeResult
 from matplotlib.pyplot import cm
 import time
 from skopt import gp_minimize, forest_minimize, dummy_minimize
diff --git a/skopt/samples/hammersly.py b/skopt/samples/hammersly.py
index b33273869..9f91c73f8 100644
--- a/skopt/samples/hammersly.py
+++ b/skopt/samples/hammersly.py
@@ -6,6 +6,7 @@
 from .halton import Halton
 from ..space import Space
 from .base import InitialPointGenerator
+from sklearn.utils import check_random_state
 
 
 class Hammersly(InitialPointGenerator):
@@ -66,7 +67,7 @@ def generate(self, dimensions, n_samples, random_state=None):
         np.array, shape=(n_dim, n_samples)
             Hammersley set
         """
-
+        rng = check_random_state(random_state)
         halton = Halton(min_skip=self.min_skip, max_skip=self.max_skip,
                         primes=self.primes)
         space = Space(dimensions)
@@ -75,12 +76,12 @@ def generate(self, dimensions, n_samples, random_state=None):
         space.set_transformer("normalize")
         if n_dim == 1:
             out = halton.generate(dimensions, n_samples,
-                                  random_state=random_state)
+                                  random_state=rng)
         else:
             out = np.empty((n_dim, n_samples), dtype=float)
             out[:n_dim - 1] = np.array(halton.generate(
                 [(0., 1.), ] * (n_dim - 1), n_samples,
-                random_state=random_state)).T
+                random_state=rng)).T
 
             out[n_dim - 1] = np.linspace(0, 1, n_samples + 2)[1:-1]
             out = space.inverse_transform(out.T)
diff --git a/skopt/samples/lhs.py b/skopt/samples/lhs.py
index 9704dc86c..416a90770 100644
--- a/skopt/samples/lhs.py
+++ b/skopt/samples/lhs.py
@@ -68,77 +68,51 @@ def generate(self, dimensions, n_samples, random_state=None):
         rng = check_random_state(random_state)
         space = Space(dimensions)
         transformer = space.get_transformer()
-        if space.is_partly_categorical:
-            space.set_transformer("identity")
-            space.set_transformer_by_type("label", Categorical)
-            trans_cat_int = space.get_transformer()
         n_dim = space.n_dims
         space.set_transformer("normalize")
-        trans_normalize = space.get_transformer()
         if self.criterion is None or n_samples == 1:
-            h = self._lhs_normalized(n_dim, n_samples, random_state)
+            h = self._lhs_normalized(n_dim, n_samples, rng)
             h = space.inverse_transform(h)
             space.set_transformer(transformer)
             return h
         else:
-            h_opt = self._lhs_normalized(n_dim, n_samples, random_state)
-            space.set_transformer(trans_normalize)
+            h_opt = self._lhs_normalized(n_dim, n_samples, rng)
             h_opt = space.inverse_transform(h_opt)
-
             if self.criterion == "correlation":
                 mincorr = np.inf
                 for i in range(self.iterations):
                     # Generate a random LHS
-                    h = self._lhs_normalized(n_dim, n_samples, random_state)
-                    space.set_transformer(trans_normalize)
-                    h = space.inverse_transform(h)
-                    if space.is_partly_categorical:
-                        space.set_transformer(trans_cat_int)
-                        h = space.transform(h)
+                    h = self._lhs_normalized(n_dim, n_samples, rng)
                     r = np.corrcoef(np.array(h).T)
                     if np.max(np.abs(r[r != 1])) < mincorr:
                         mincorr = np.max(np.abs(r - np.eye(r.shape[0])))
                         h_opt = h.copy()
-                        if space.is_partly_categorical:
-                            space.set_transformer(trans_cat_int)
-                            h_opt = space.inverse_transform(h_opt)
-
+                        h_opt = space.inverse_transform(h_opt)
             elif self.criterion == "maximin":
                 maxdist = 0
                 # Maximize the minimum distance between points
                 for i in range(self.iterations):
-                    h = self._lhs_normalized(n_dim, n_samples, random_state)
-                    space.set_transformer(trans_normalize)
-                    h = space.inverse_transform(h)
-                    if space.is_partly_categorical:
-                        space.set_transformer(trans_cat_int)
-                        h = space.transform(h)
+                    h = self._lhs_normalized(n_dim, n_samples, rng)
                     d = spatial.distance.pdist(np.array(h), 'euclidean')
-                    if maxdist > np.min(d):
+                    if maxdist < np.min(d):
                         maxdist = np.min(d)
                         h_opt = h.copy()
-                        if space.is_partly_categorical:
-                            space.set_transformer(trans_cat_int)
-                            h_opt = space.inverse_transform(h_opt)
+                        h_opt = space.inverse_transform(h_opt)
             elif self.criterion == "ratio":
                 minratio = np.inf
 
                 # Maximize the minimum distance between points
                 for i in range(self.iterations):
-                    h = self._lhs_normalized(n_dim, n_samples, random_state)
-                    space.set_transformer(trans_normalize)
-                    h = space.inverse_transform(h)
-                    if space.is_partly_categorical:
-                        space.set_transformer(trans_cat_int)
-                        h = space.transform(h)
+                    h = self._lhs_normalized(n_dim, n_samples, rng)
                     p = spatial.distance.pdist(np.array(h), 'euclidean')
-                    ratio = np.max(p) / np.min(p)
-                    if minratio < ratio:
+                    if np.min(p) == 0:
+                        ratio = np.max(p) / 1e-8
+                    else:
+                        ratio = np.max(p) / np.min(p)
+                    if minratio > ratio:
                         minratio = ratio
                         h_opt = h.copy()
-                        if space.is_partly_categorical:
-                            space.set_transformer(trans_cat_int)
-                            h_opt = space.inverse_transform(h_opt)
+                        h_opt = space.inverse_transform(h_opt)
             else:
                 raise ValueError("Wrong criterion."
                                  "Got {}".format(self.criterion))
@@ -158,4 +132,4 @@ def _lhs_normalized(self, n_dim, n_samples, random_state):
                 h[:, j] = u[:, j] * np.diff(x) + x[:n_samples]
         else:
             raise ValueError("Wrong lhs_type. Got ".format(self.lhs_type))
-        return random_permute_matrix(h, random_state=random_state)
+        return random_permute_matrix(h, random_state=rng)
diff --git a/skopt/samples/sobol.py b/skopt/samples/sobol.py
index 2d4cc6502..d56110a68 100644
--- a/skopt/samples/sobol.py
+++ b/skopt/samples/sobol.py
@@ -227,7 +227,7 @@ def generate(self, dimensions, n_samples, random_state=None):
         for j in range(n_samples):
             r[j, 0:n_dim], seed = self._sobol(n_dim, seed)
         if self.randomize:
-            r = space.inverse_transform(_random_shift(r, random_state))
+            r = space.inverse_transform(_random_shift(r, rng))
         r = space.inverse_transform(r)
         space.set_transformer(transformer)
         return r
diff --git a/skopt/tests/test_samples.py b/skopt/tests/test_samples.py
index 3284e882d..c1d708413 100644
--- a/skopt/tests/test_samples.py
+++ b/skopt/tests/test_samples.py
@@ -10,7 +10,7 @@
 from numpy.testing import assert_array_equal
 from numpy.testing import assert_equal
 from numpy.testing import assert_raises_regex
-
+from scipy import spatial
 from skopt import Optimizer
 from skopt.space import Space
 from skopt.space import Real
@@ -51,6 +51,34 @@ def test_lhs_criterion():
         assert len(samples[0]) == 2
 
 
+def test_lhs_pdist():
+    n_dim = 2
+    n_samples = 20
+    lhs = Lhs()
+
+    h = lhs._lhs_normalized(n_dim, n_samples, 0)
+    d_classic = spatial.distance.pdist(np.array(h), 'euclidean')
+    lhs = Lhs(criterion="maximin", iterations=100)
+    h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
+    d = spatial.distance.pdist(np.array(h), 'euclidean')
+    assert np.min(d) > np.min(d_classic)
+
+
+def test_lhs_random_state():
+    n_dim = 2
+    n_samples = 20
+    lhs = Lhs()
+
+    h = lhs._lhs_normalized(n_dim, n_samples, 0)
+    h2 = lhs._lhs_normalized(n_dim, n_samples, 0)
+    assert_array_equal(h, h2)
+    for criterion in ["maximin", "ratio", "correlation"]:
+        lhs = Lhs(criterion=criterion, iterations=100)
+        h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
+        h2 = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
+        assert_array_equal(h, h2)
+
+
 @pytest.mark.fast_test
 def test_bit():
     X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

From bc37008454d8591a352e65e068f5e0f633f3b733 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 11:47:01 +0100
Subject: [PATCH 075/265] Rename samples module to sampler

---
 doc/modules/classes.rst                        | 18 +++++++++---------
 doc/modules/sampler.rst                        |  6 ++++++
 doc/modules/samples.rst                        |  6 ------
 doc/themes/scikit-learn-modern/javascript.html |  2 +-
 examples/sampler/README.txt                    |  6 ++++++
 .../initial-sampling-method-integer.py         |  8 ++++----
 .../initial-sampling-method.py                 |  8 ++++----
 .../sampling_comparison.py                     |  0
 examples/samples/README.txt                    |  6 ------
 setup.py                                       |  2 +-
 skopt/__init__.py                              |  2 +-
 skopt/optimizer/optimizer.py                   |  5 +----
 skopt/{samples => sampler}/__init__.py         |  0
 skopt/{samples => sampler}/base.py             |  0
 skopt/{samples => sampler}/halton.py           |  0
 skopt/{samples => sampler}/hammersly.py        |  0
 skopt/{samples => sampler}/lhs.py              |  0
 skopt/{samples => sampler}/sobol.py            |  0
 skopt/tests/test_gp_opt.py                     |  4 ++--
 skopt/tests/test_samples.py                    |  6 +++---
 20 files changed, 38 insertions(+), 41 deletions(-)
 create mode 100644 doc/modules/sampler.rst
 delete mode 100644 doc/modules/samples.rst
 create mode 100644 examples/sampler/README.txt
 rename examples/{samples => sampler}/initial-sampling-method-integer.py (97%)
 rename examples/{samples => sampler}/initial-sampling-method.py (97%)
 rename examples/{samples => sampler}/sampling_comparison.py (100%)
 delete mode 100644 examples/samples/README.txt
 rename skopt/{samples => sampler}/__init__.py (100%)
 rename skopt/{samples => sampler}/base.py (100%)
 rename skopt/{samples => sampler}/halton.py (100%)
 rename skopt/{samples => sampler}/hammersly.py (100%)
 rename skopt/{samples => sampler}/lhs.py (100%)
 rename skopt/{samples => sampler}/sobol.py (100%)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index db8405f74..b255dfbb6 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -221,16 +221,16 @@ details.
     utils.point_aslist
     utils.use_named_args
 
-.. _samples_ref:
+.. _sampler_ref:
 
-:mod:`skopt.samples`: Samples
-===============================
+:mod:`skopt.sampler`: Samplers
+==============================
 
-.. automodule:: skopt.samples
+.. automodule:: skopt.sampler
    :no-members:
    :no-inherited-members:
 
-**User guide:** See the :ref:`sample` section for further details.
+**User guide:** See the :ref:`sampler` section for further details.
 
 .. currentmodule:: skopt
 
@@ -238,10 +238,10 @@ details.
    :toctree: generated/
    :template: class.rst
 
-    samples.Lhs
-    samples.Sobol
-    samples.Halton
-    samples.Hammersly
+    sampler.Lhs
+    sampler.Sobol
+    sampler.Halton
+    sampler.Hammersly
 
 
 .. _space_ref:
diff --git a/doc/modules/sampler.rst b/doc/modules/sampler.rst
new file mode 100644
index 000000000..eeb151f6d
--- /dev/null
+++ b/doc/modules/sampler.rst
@@ -0,0 +1,6 @@
+.. currentmodule:: skopt.sampler
+
+.. _sampler:
+
+Sampling methods
+================
diff --git a/doc/modules/samples.rst b/doc/modules/samples.rst
deleted file mode 100644
index ab8441cb9..000000000
--- a/doc/modules/samples.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-.. currentmodule:: skopt.samples
-
-.. _samples:
-
-Sampling methods
-================
diff --git a/doc/themes/scikit-learn-modern/javascript.html b/doc/themes/scikit-learn-modern/javascript.html
index f62c657d1..4d9685653 100644
--- a/doc/themes/scikit-learn-modern/javascript.html
+++ b/doc/themes/scikit-learn-modern/javascript.html
@@ -10,7 +10,7 @@
 
 <script>
 $(document).ready(function() {
-    /* Add a [>>>] button on the top-right corner of code samples to hide
+    /* Add a [>>>] button on the top-right corner of code sampler to hide
      * the >>> and ... prompts and the output and thus make the code
      * copyable. */
     var div = $('.highlight-python .highlight,' +
diff --git a/examples/sampler/README.txt b/examples/sampler/README.txt
new file mode 100644
index 000000000..ee045187d
--- /dev/null
+++ b/examples/sampler/README.txt
@@ -0,0 +1,6 @@
+.. _sampler_examples:
+
+Initial sampling functions
+--------------------------
+
+Examples concerning the :mod:`skopt.sampler` module.
diff --git a/examples/samples/initial-sampling-method-integer.py b/examples/sampler/initial-sampling-method-integer.py
similarity index 97%
rename from examples/samples/initial-sampling-method-integer.py
rename to examples/sampler/initial-sampling-method-integer.py
index f32c631d7..7430584ba 100644
--- a/examples/samples/initial-sampling-method-integer.py
+++ b/examples/sampler/initial-sampling-method-integer.py
@@ -27,10 +27,10 @@
 np.random.seed(1234)
 import matplotlib.pyplot as plt
 from skopt.space import Space
-from skopt.samples import Sobol
-from skopt.samples import Lhs
-from skopt.samples import Halton
-from skopt.samples import Hammersly
+from skopt.sampler import Sobol
+from skopt.sampler import Lhs
+from skopt.sampler import Halton
+from skopt.sampler import Hammersly
 from scipy.spatial.distance import pdist
 
 #############################################################################
diff --git a/examples/samples/initial-sampling-method.py b/examples/sampler/initial-sampling-method.py
similarity index 97%
rename from examples/samples/initial-sampling-method.py
rename to examples/sampler/initial-sampling-method.py
index 858bad131..bbb33e4c5 100644
--- a/examples/samples/initial-sampling-method.py
+++ b/examples/sampler/initial-sampling-method.py
@@ -28,10 +28,10 @@
 np.random.seed(123)
 import matplotlib.pyplot as plt
 from skopt.space import Space
-from skopt.samples import Sobol
-from skopt.samples import Lhs
-from skopt.samples import Halton
-from skopt.samples import Hammersly
+from skopt.sampler import Sobol
+from skopt.sampler import Lhs
+from skopt.sampler import Halton
+from skopt.sampler import Hammersly
 from scipy.spatial.distance import pdist
 
 #############################################################################
diff --git a/examples/samples/sampling_comparison.py b/examples/sampler/sampling_comparison.py
similarity index 100%
rename from examples/samples/sampling_comparison.py
rename to examples/sampler/sampling_comparison.py
diff --git a/examples/samples/README.txt b/examples/samples/README.txt
deleted file mode 100644
index 41d26c2dd..000000000
--- a/examples/samples/README.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-.. _samples_examples:
-
-Initial sampling functions
---------------------------
-
-Examples concerning the :mod:`skopt.samples` module.
diff --git a/setup.py b/setup.py
index e07997f11..fb5ab0f52 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@
       license='BSD 3-clause "New" or "Revised License"',
       author='The scikit-optimize contributors',
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
-                'skopt.learning.gaussian_process', 'skopt.samples'],
+                'skopt.learning.gaussian_process', 'skopt.sampler'],
       install_requires=['joblib', 'pyaml', 'numpy', 'scipy>=0.14.0',
                         'scikit-learn>=0.19.1'],
       extras_require={
diff --git a/skopt/__init__.py b/skopt/__init__.py
index f4285f5fe..33c2ade38 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -46,7 +46,7 @@
     from . import optimizer
 
     from . import space
-    from . import samples
+    from . import sampler
     from .optimizer import dummy_minimize
     from .optimizer import forest_minimize
     from .optimizer import gbrt_minimize
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index cb0d3dd78..97bb481be 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -16,6 +16,7 @@
 from ..acquisition import _gaussian_acquisition
 from ..acquisition import gaussian_acquisition_1D
 from ..learning import GaussianProcessRegressor
+from ..sampler import Sobol, Lhs, Hammersly, Halton
 from ..space import Categorical
 from ..space import Space
 from ..utils import check_x_in_space
@@ -268,19 +269,15 @@ def __init__(self, dimensions, base_estimator="gp",
         if initial_point_generator != "random" and \
                 isinstance(initial_point_generator, str):
             if initial_point_generator == "sobol":
-                from skopt.samples import Sobol
                 self._initial_point_generator = Sobol(
                     **self.init_point_gen_kwargs)
             elif initial_point_generator == "halton":
-                from skopt.samples import Halton
                 self._initial_point_generator = Halton(
                     **self.init_point_gen_kwargs)
             elif initial_point_generator == "hammersly":
-                from skopt.samples import Hammersly
                 self._initial_point_generator = Hammersly(
                     **self.init_point_gen_kwargs)
             elif initial_point_generator == "lhs":
-                from skopt.samples import Lhs
                 self._initial_point_generator = Lhs(
                     **self.init_point_gen_kwargs)
             else:
diff --git a/skopt/samples/__init__.py b/skopt/sampler/__init__.py
similarity index 100%
rename from skopt/samples/__init__.py
rename to skopt/sampler/__init__.py
diff --git a/skopt/samples/base.py b/skopt/sampler/base.py
similarity index 100%
rename from skopt/samples/base.py
rename to skopt/sampler/base.py
diff --git a/skopt/samples/halton.py b/skopt/sampler/halton.py
similarity index 100%
rename from skopt/samples/halton.py
rename to skopt/sampler/halton.py
diff --git a/skopt/samples/hammersly.py b/skopt/sampler/hammersly.py
similarity index 100%
rename from skopt/samples/hammersly.py
rename to skopt/sampler/hammersly.py
diff --git a/skopt/samples/lhs.py b/skopt/sampler/lhs.py
similarity index 100%
rename from skopt/samples/lhs.py
rename to skopt/sampler/lhs.py
diff --git a/skopt/samples/sobol.py b/skopt/sampler/sobol.py
similarity index 100%
rename from skopt/samples/sobol.py
rename to skopt/sampler/sobol.py
diff --git a/skopt/tests/test_gp_opt.py b/skopt/tests/test_gp_opt.py
index f39d09c8c..8734aff5d 100644
--- a/skopt/tests/test_gp_opt.py
+++ b/skopt/tests/test_gp_opt.py
@@ -12,7 +12,7 @@
 
 
 def check_minimize(func, y_opt, bounds, acq_optimizer, acq_func,
-                   margin, n_calls, init_gen="random", n_random_starts=10):
+                   margin, n_calls, n_random_starts=10, init_gen="random"):
     r = gp_minimize(func, bounds, acq_optimizer=acq_optimizer,
                     acq_func=acq_func, n_random_starts=n_random_starts,
                     n_calls=n_calls, random_state=1,
@@ -40,7 +40,7 @@ def test_gp_minimize_bench1(search, acq):
 @pytest.mark.parametrize("initgen", INITGEN)
 def test_gp_minimize_bench1_initgen(search, acq, initgen):
     check_minimize(bench1, 0.,
-                   [(-2.0, 2.0)], search, acq, 0.05, 20, initgen)
+                   [(-2.0, 2.0)], search, acq, 0.05, 20, init_gen=initgen)
 
 
 @pytest.mark.slow_test
diff --git a/skopt/tests/test_samples.py b/skopt/tests/test_samples.py
index c1d708413..bae6a1617 100644
--- a/skopt/tests/test_samples.py
+++ b/skopt/tests/test_samples.py
@@ -17,9 +17,9 @@
 from skopt.space import Integer
 from skopt.space import Categorical
 from skopt.space import check_dimension as space_check_dimension
-from skopt.samples.sobol import _bit_lo0, _bit_hi1
-from skopt.samples.halton import _van_der_corput_samples, _create_primes
-from skopt.samples import Hammersly, Halton, Lhs, Sobol
+from skopt.sampler.sobol import _bit_lo0, _bit_hi1
+from skopt.sampler.halton import _van_der_corput_samples, _create_primes
+from skopt.sampler import Hammersly, Halton, Lhs, Sobol
 
 
 @pytest.mark.fast_test

From 926b24fe998e02a991921343763bc5e07a40630f Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 12:21:05 +0100
Subject: [PATCH 076/265] Test full range of required python package versions
 in CI

---
 .travis.yml | 16 ++++++++--------
 setup.py    |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index e70a69a23..bbbc3c158 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,32 +22,32 @@ matrix:
     - name: "Python 3.5 - scikit 0.19.2"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.5"
-           NUMPY_VERSION="*" SCIPY_VERSION="*" PYAML_VERSION="*"
+           NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.14.0" PYAML_VERSION="*"
            SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="*" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="*" SCIPY_VERSION="*" PYAML_VERSION="*"
+           NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.16.0" PYAML_VERSION="*"
            SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="*" COVERAGE="false"
            JOBLIB_VERSION="0.12"
     - name: "Python 3.7 - scikit 0.21.3"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="*" SCIPY_VERSION="*" PYAML_VERSION="*"
+         NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.17.0" PYAML_VERSION="*"
          SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="*" COVERAGE="true"
-         JOBLIB_VERSION="*"
+         JOBLIB_VERSION="0.12"
     - name: "Python 3.7 - scikit 0.22.1"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="*" SCIPY_VERSION="*" PYAML_VERSION="*"
+         NUMPY_VERSION="1.14.0" SCIPY_VERSION="1.1.0" PYAML_VERSION="*"
          SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="*"
     - name: "Python 3.8  latest package versions"
       python: "3.7"
-      env: DISTRIB="conda" PYTHON_VERSION="3.8" COVERAGE="false"
-           NUMPY_VERSION="*" SCIPY_VERSION="*" PYAML_VERSION="*"
-           SCIKIT_LEARN_VERSION="*" JOBLIB_VERSION="*"
+      env: DISTRIB="conda" PYTHON_VERSION="3.8.1" COVERAGE="false"
+           NUMPY_VERSION="1.18.1" SCIPY_VERSION="1.4.1" PYAML_VERSION="19.12.0"
+           SCIKIT_LEARN_VERSION="0.22.1" JOBLIB_VERSION="0.14.1"
 
 
 install: source build_tools/travis/install.sh
diff --git a/setup.py b/setup.py
index 7023e7972..e7d2422ac 100644
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,7 @@
       classifiers=CLASSIFIERS,
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
                 'skopt.learning.gaussian_process'],
-      install_requires=['joblib', 'pyaml', 'numpy', 'scipy>=0.14.0',
+      install_requires=['joblib', 'pyaml', 'numpy>=1.8.2', 'scipy>=0.14.0',
                         'scikit-learn>=0.19.1'],
       extras_require={
         'plots':  ["matplotlib"]

From 2863dc4d3ea3f7fa0f1cd3442d6ef9bd5bdc6778 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 12:48:03 +0100
Subject: [PATCH 077/265] Increase numpy version

---
 .travis.yml    | 4 ++--
 pyproject.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index bbbc3c158..7610ad07e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,8 +22,8 @@ matrix:
     - name: "Python 3.5 - scikit 0.19.2"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.5"
-           NUMPY_VERSION="1.8.2" SCIPY_VERSION="0.14.0" PYAML_VERSION="*"
-           SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="*" COVERAGE="false"
+           NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.16.0" PYAML_VERSION="*"
+           SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
diff --git a/pyproject.toml b/pyproject.toml
index ccba6e074..33a1d17f4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@
 requires = [
     "setuptools",
     "wheel",
-    "numpy",
+    "numpy>=1.8.2",
     "scipy>=0.14",
     "scikit-learn>=0.19.1",
     "pyaml",

From 91c1bd40ed8d105faad63c3fa892b9ff2239e48a Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 13:00:20 +0100
Subject: [PATCH 078/265] Increase joblib version

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 7610ad07e..59e1144ef 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -23,8 +23,8 @@ matrix:
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.5"
            NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.16.0" PYAML_VERSION="*"
-           SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
-           JOBLIB_VERSION="0.11"
+           SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="1.5.0" COVERAGE="false"
+           JOBLIB_VERSION="0.12"
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"

From f0e2fb6ba3ed5ae2eeb8b901ca2a62be97023fb0 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 13:05:47 +0100
Subject: [PATCH 079/265] Set matplotlib to 2.0.0

---
 .travis.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 59e1144ef..b0ca255bf 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -23,26 +23,26 @@ matrix:
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.5"
            NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.16.0" PYAML_VERSION="*"
-           SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="1.5.0" COVERAGE="false"
+           SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.12"
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
            NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.16.0" PYAML_VERSION="*"
-           SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="*" COVERAGE="false"
+           SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.12"
     - name: "Python 3.7 - scikit 0.21.3"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
          NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.17.0" PYAML_VERSION="*"
          SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="*" COVERAGE="true"
-         JOBLIB_VERSION="0.12"
+         JOBLIB_VERSION="0.13"
     - name: "Python 3.7 - scikit 0.22.1"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
          NUMPY_VERSION="1.14.0" SCIPY_VERSION="1.1.0" PYAML_VERSION="*"
          SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true"
-         JOBLIB_VERSION="*"
+         JOBLIB_VERSION="0.13"
     - name: "Python 3.8  latest package versions"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.8.1" COVERAGE="false"

From 06634442c34bac85c3e9e594ea67aaaaa80ea61b Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 13:18:26 +0100
Subject: [PATCH 080/265] Test python 3.6

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index b0ca255bf..b6afc511e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,9 +19,9 @@ matrix:
     # Linux environment to test scikit-learn against numpy and scipy master
     # installed from their CI wheels in a virtualenv with the Python
     # interpreter provided by travis.
-    - name: "Python 3.5 - scikit 0.19.2"
+    - name: "Python 3.6 - scikit 0.19.2"
       python: "3.7"
-      env: DISTRIB="conda" PYTHON_VERSION="3.5"
+      env: DISTRIB="conda" PYTHON_VERSION="3.6"
            NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.16.0" PYAML_VERSION="*"
            SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.12"

From 2caae0e4ec18d6f86d355d5c0b33e0e115db3410 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 13:49:48 +0100
Subject: [PATCH 081/265] Minimum package numbers are tested and adapted

python 3.5 support has been dropped
---
 .travis.yml      | 14 +++++++-------
 README.rst       |  2 +-
 doc/install.rst  |  2 +-
 requirements.txt | 10 +++++-----
 setup.py         |  5 ++---
 5 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index b6afc511e..41b1fcab6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,25 +22,25 @@ matrix:
     - name: "Python 3.6 - scikit 0.19.2"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.16.0" PYAML_VERSION="*"
+           NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.14.0" PYAML_VERSION="16.9.0"
            SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
-           JOBLIB_VERSION="0.12"
+           JOBLIB_VERSION="0.10"
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.16.0" PYAML_VERSION="*"
+           NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.12.0"
            SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
-           JOBLIB_VERSION="0.12"
+           JOBLIB_VERSION="0.11"
     - name: "Python 3.7 - scikit 0.21.3"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.17.0" PYAML_VERSION="*"
+         NUMPY_VERSION="1.12.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
          SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="*" COVERAGE="true"
-         JOBLIB_VERSION="0.13"
+         JOBLIB_VERSION="0.12"
     - name: "Python 3.7 - scikit 0.22.1"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.14.0" SCIPY_VERSION="1.1.0" PYAML_VERSION="*"
+         NUMPY_VERSION="1.14.0" SCIPY_VERSION="1.2.0" PYAML_VERSION="18.11.0"
          SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.13"
     - name: "Python 3.8  latest package versions"
diff --git a/README.rst b/README.rst
index e6bbe971f..019d68147 100644
--- a/README.rst
+++ b/README.rst
@@ -38,7 +38,7 @@ Important links
 Install
 -------
 
-The latest released version of scikit-optimize is v0.7.1, which you can install
+The latest released version of scikit-optimize is v0.7.2, which you can install
 with:
 ::
 
diff --git a/doc/install.rst b/doc/install.rst
index 620ad836b..cac4cfb91 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -4,7 +4,7 @@
 Installation
 ============
 
-scikit-optimize supports Python 3.5 or newer.
+scikit-optimize supports Python 3.6 or newer.
 
 The newest release can be installed via pip:
 
diff --git a/requirements.txt b/requirements.txt
index 87497b02c..3e8ddbdc3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
-numpy
-scipy
+numpy>=1.10
+scipy>=0.16
 scikit-learn>=0.19
-matplotlib
+matplotlib>=2.0.0
 pytest
-nose
-pyaml
+pyaml>=16.9
+joblib>=0.10
diff --git a/setup.py b/setup.py
index e7d2422ac..33f80d47c 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,6 @@
                'Operating System :: POSIX',
                'Operating System :: Unix',
                'Operating System :: MacOS',
-               'Programming Language :: Python :: 3.5',
                'Programming Language :: Python :: 3.6',
                'Programming Language :: Python :: 3.7',
                'Programming Language :: Python :: 3.8']
@@ -43,10 +42,10 @@
       classifiers=CLASSIFIERS,
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
                 'skopt.learning.gaussian_process'],
-      install_requires=['joblib', 'pyaml', 'numpy>=1.8.2', 'scipy>=0.14.0',
+      install_requires=['joblib>=0.10', 'pyaml>=16.9', 'numpy>=1.10.0', 'scipy>=0.16.0',
                         'scikit-learn>=0.19.1'],
       extras_require={
-        'plots':  ["matplotlib"]
+        'plots':  ["matplotlib>=2.0.0"]
         }
 
       )

From f806e353b1fa1fb20d147d1a87bb9e6389321143 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 14:16:33 +0100
Subject: [PATCH 082/265] Fix version in travis

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 41b1fcab6..48ed488a1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,7 +28,7 @@ matrix:
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.12.0"
+           NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.16.0" PYAML_VERSION="16.12.0"
            SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.7 - scikit 0.21.3"

From 1fffcbc89209d2a85354357624b006e7047ae100 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 14:27:05 +0100
Subject: [PATCH 083/265] Test newer scipy

---
 .travis.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 48ed488a1..fa5498819 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,14 +21,14 @@ matrix:
     # interpreter provided by travis.
     - name: "Python 3.6 - scikit 0.19.2"
       python: "3.7"
-      env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.14.0" PYAML_VERSION="16.9.0"
+      env: DISTRIB="conda" PYTHON_VERSION="3.5"
+           NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.9.0"
            SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
-           JOBLIB_VERSION="0.10"
+           JOBLIB_VERSION="0.11"
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.16.0" PYAML_VERSION="16.12.0"
+           NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.12.0"
            SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.7 - scikit 0.21.3"

From 772fdf1d429bcc1325226b98aee22724b8e6838c Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 14:43:09 +0100
Subject: [PATCH 084/265] Test with newer numpy version

---
 .travis.yml      | 6 +++---
 doc/install.rst  | 2 +-
 pyproject.toml   | 6 +++---
 requirements.txt | 6 +++---
 setup.py         | 3 ++-
 5 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index fa5498819..1c4f073b7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,16 +19,16 @@ matrix:
     # Linux environment to test scikit-learn against numpy and scipy master
     # installed from their CI wheels in a virtualenv with the Python
     # interpreter provided by travis.
-    - name: "Python 3.6 - scikit 0.19.2"
+    - name: "Python 3.5 - scikit 0.19.2"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.5"
-           NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.9.0"
+           NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.9.0"
            SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.10.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.12.0"
+           NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.12.0"
            SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.7 - scikit 0.21.3"
diff --git a/doc/install.rst b/doc/install.rst
index cac4cfb91..620ad836b 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -4,7 +4,7 @@
 Installation
 ============
 
-scikit-optimize supports Python 3.6 or newer.
+scikit-optimize supports Python 3.5 or newer.
 
 The newest release can be installed via pip:
 
diff --git a/pyproject.toml b/pyproject.toml
index 33a1d17f4..6e5347cb2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,9 +3,9 @@
 requires = [
     "setuptools",
     "wheel",
-    "numpy>=1.8.2",
-    "scipy>=0.14",
+    "numpy>=1.11",
+    "scipy>=0.18",
     "scikit-learn>=0.19.1",
-    "pyaml",
+    "pyaml>=16.9",
     "joblib>=0.11",
 ]
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 3e8ddbdc3..18985117d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
-numpy>=1.10
-scipy>=0.16
+numpy>=1.11
+scipy>=0.18
 scikit-learn>=0.19
 matplotlib>=2.0.0
 pytest
 pyaml>=16.9
-joblib>=0.10
+joblib>=0.11
diff --git a/setup.py b/setup.py
index 33f80d47c..a559b67f9 100644
--- a/setup.py
+++ b/setup.py
@@ -27,6 +27,7 @@
                'Operating System :: POSIX',
                'Operating System :: Unix',
                'Operating System :: MacOS',
+               'Programming Language :: Python :: 3.5',
                'Programming Language :: Python :: 3.6',
                'Programming Language :: Python :: 3.7',
                'Programming Language :: Python :: 3.8']
@@ -42,7 +43,7 @@
       classifiers=CLASSIFIERS,
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
                 'skopt.learning.gaussian_process'],
-      install_requires=['joblib>=0.10', 'pyaml>=16.9', 'numpy>=1.10.0', 'scipy>=0.16.0',
+      install_requires=['joblib>=0.11', 'pyaml>=16.9', 'numpy>=1.11.0', 'scipy>=0.18.0',
                         'scikit-learn>=0.19.1'],
       extras_require={
         'plots':  ["matplotlib>=2.0.0"]

From 816572f1e2f505db8dff661c1006660fa8b43fca Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 14:51:58 +0100
Subject: [PATCH 085/265] Try different version

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 1c4f073b7..871afe045 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,13 +28,13 @@ matrix:
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.12.0"
+           NUMPY_VERSION="1.12.0" SCIPY_VERSION="0.19.1" PYAML_VERSION="16.12.0"
            SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.7 - scikit 0.21.3"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.12.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
+         NUMPY_VERSION="1.13.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
          SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.12"
     - name: "Python 3.7 - scikit 0.22.1"

From 46442d722a1eb8f8a3bcaec3faf07d48254da6a3 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 15:25:06 +0100
Subject: [PATCH 086/265] Fix pep8

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index a559b67f9..03ab8dd73 100644
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,8 @@
       classifiers=CLASSIFIERS,
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
                 'skopt.learning.gaussian_process'],
-      install_requires=['joblib>=0.11', 'pyaml>=16.9', 'numpy>=1.11.0', 'scipy>=0.18.0',
+      install_requires=['joblib>=0.11', 'pyaml>=16.9', 'numpy>=1.11.0',
+                        'scipy>=0.18.0',
                         'scikit-learn>=0.19.1'],
       extras_require={
         'plots':  ["matplotlib>=2.0.0"]

From 4308659ab24516d388889628a1ba2633f6d9e55b Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 19 Feb 2020 19:07:08 +0100
Subject: [PATCH 087/265] Rename test and increase min numpy version to 1.12

---
 .travis.yml                                      | 8 ++++----
 pyproject.toml                                   | 2 +-
 requirements.txt                                 | 2 +-
 setup.py                                         | 2 +-
 skopt/tests/{test_samples.py => test_sampler.py} | 0
 5 files changed, 7 insertions(+), 7 deletions(-)
 rename skopt/tests/{test_samples.py => test_sampler.py} (100%)

diff --git a/.travis.yml b/.travis.yml
index 871afe045..6c76b4ceb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,25 +22,25 @@ matrix:
     - name: "Python 3.5 - scikit 0.19.2"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.5"
-           NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.9.0"
+           NUMPY_VERSION="1.12.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.9.0"
            SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.12.0" SCIPY_VERSION="0.19.1" PYAML_VERSION="16.12.0"
+           NUMPY_VERSION="1.14.0" SCIPY_VERSION="0.19.1" PYAML_VERSION="16.12.0"
            SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.7 - scikit 0.21.3"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.13.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
+         NUMPY_VERSION="1.15.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
          SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.12"
     - name: "Python 3.7 - scikit 0.22.1"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.14.0" SCIPY_VERSION="1.2.0" PYAML_VERSION="18.11.0"
+         NUMPY_VERSION="1.16.0" SCIPY_VERSION="1.2.0" PYAML_VERSION="18.11.0"
          SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.13"
     - name: "Python 3.8  latest package versions"
diff --git a/pyproject.toml b/pyproject.toml
index 6e5347cb2..6fad80b64 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@
 requires = [
     "setuptools",
     "wheel",
-    "numpy>=1.11",
+    "numpy>=1.12",
     "scipy>=0.18",
     "scikit-learn>=0.19.1",
     "pyaml>=16.9",
diff --git a/requirements.txt b/requirements.txt
index 18985117d..5f2ef0417 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-numpy>=1.11
+numpy>=1.12
 scipy>=0.18
 scikit-learn>=0.19
 matplotlib>=2.0.0
diff --git a/setup.py b/setup.py
index 981892675..3b311924e 100644
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,7 @@
       classifiers=CLASSIFIERS,
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
                 'skopt.learning.gaussian_process', 'skopt.sampler'],
-      install_requires=['joblib>=0.11', 'pyaml>=16.9', 'numpy>=1.11.0',
+      install_requires=['joblib>=0.11', 'pyaml>=16.9', 'numpy>=1.12.0',
                         'scipy>=0.18.0',
                         'scikit-learn>=0.19.1'],
       extras_require={
diff --git a/skopt/tests/test_samples.py b/skopt/tests/test_sampler.py
similarity index 100%
rename from skopt/tests/test_samples.py
rename to skopt/tests/test_sampler.py

From 3c956a6c512724ca3f75062ffcae40dfc0568427 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 15:43:37 +0100
Subject: [PATCH 088/265] Add cook_initial_point_generator and grid

* Rename n_random_starts to n_random_starts
* n_random_starts is deprecated
* init_point_gen_kwargs is removed from optimizer init
* InitialPointGenerator has set_params
---
 doc/modules/classes.rst                       |   1 +
 doc/modules/transformers.rst                  |   7 +
 .../initial-sampling-method-integer.py        |  13 +-
 examples/sampler/initial-sampling-method.py   |  13 +-
 examples/sampler/sampling_comparison.py       |  59 ++++---
 skopt/__init__.py                             |   2 +-
 skopt/optimizer/base.py                       |  43 ++++--
 skopt/optimizer/dummy.py                      |  13 +-
 skopt/optimizer/forest.py                     |  20 ++-
 skopt/optimizer/gbrt.py                       |  21 ++-
 skopt/optimizer/gp.py                         |  20 ++-
 skopt/optimizer/optimizer.py                  |  47 ++----
 skopt/sampler/__init__.py                     |   5 +-
 skopt/sampler/base.py                         |  24 +++
 skopt/sampler/grid.py                         | 144 ++++++++++++++++++
 skopt/sampler/halton.py                       |   1 +
 skopt/sampler/lhs.py                          |  13 +-
 skopt/tests/test_common.py                    |  90 +++++------
 skopt/tests/test_forest_opt.py                |   6 +-
 skopt/tests/test_gp_opt.py                    |  16 +-
 skopt/tests/test_sampler.py                   |  64 +++++++-
 skopt/utils.py                                |  59 +++++--
 22 files changed, 492 insertions(+), 189 deletions(-)
 create mode 100644 doc/modules/transformers.rst
 create mode 100644 skopt/sampler/grid.py

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index b255dfbb6..f90ebba73 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -212,6 +212,7 @@ details.
    :template: function.rst
 
     utils.cook_estimator
+    utils.cook_initial_point_generator
     utils.dimensions_aslist
     utils.expected_minimum
     utils.expected_minimum_random_sampling
diff --git a/doc/modules/transformers.rst b/doc/modules/transformers.rst
new file mode 100644
index 000000000..34693f287
--- /dev/null
+++ b/doc/modules/transformers.rst
@@ -0,0 +1,7 @@
+.. currentmodule:: skopt.space.transformers
+
+.. _transformers:
+
+Transformers
+============
+
diff --git a/examples/sampler/initial-sampling-method-integer.py b/examples/sampler/initial-sampling-method-integer.py
index 7430584ba..c66a9508b 100644
--- a/examples/sampler/initial-sampling-method-integer.py
+++ b/examples/sampler/initial-sampling-method-integer.py
@@ -31,6 +31,7 @@
 from skopt.sampler import Lhs
 from skopt.sampler import Halton
 from skopt.sampler import Hammersly
+from skopt.sampler import Grid
 from scipy.spatial.distance import pdist
 
 #############################################################################
@@ -50,7 +51,6 @@ def plot_searchspace(x, title):
 
 n_samples = 10
 space = Space([(0, 5), (0, 5)])
-space.set_transformer("normalize")
 
 #############################################################################
 # Random sampling
@@ -151,6 +151,17 @@ def plot_searchspace(x, title):
 pdist_data.append(pdist(x).flatten())
 x_label.append("hammersly")
 
+#############################################################################
+# Grid sampling
+# -------------
+
+grid = Grid(border="include", use_full_layout=False)
+x = grid.generate(space.dimensions, n_samples)
+plot_searchspace(x, 'Grid')
+print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
+pdist_data.append(pdist(x).flatten())
+x_label.append("grid")
+
 #############################################################################
 # Pdist boxplot of all methods
 # ----------------------------
diff --git a/examples/sampler/initial-sampling-method.py b/examples/sampler/initial-sampling-method.py
index bbb33e4c5..ad0e7f903 100644
--- a/examples/sampler/initial-sampling-method.py
+++ b/examples/sampler/initial-sampling-method.py
@@ -32,6 +32,7 @@
 from skopt.sampler import Lhs
 from skopt.sampler import Halton
 from skopt.sampler import Hammersly
+from skopt.sampler import Grid
 from scipy.spatial.distance import pdist
 
 #############################################################################
@@ -50,7 +51,7 @@ def plot_searchspace(x, title):
 n_samples = 10
 
 space = Space([(-5., 10.), (0., 15.)])
-space.set_transformer("normalize")
+# space.set_transformer("normalize")
 
 #############################################################################
 # Random sampling
@@ -142,6 +143,16 @@ def plot_searchspace(x, title):
 pdist_data.append(pdist(x).flatten())
 x_label.append("hammersly")
 
+#############################################################################
+# Grid sampling
+# -------------
+
+grid = Grid(border="include", use_full_layout=False)
+x = grid.generate(space.dimensions, n_samples)
+plot_searchspace(x, 'Grid')
+pdist_data.append(pdist(x).flatten())
+x_label.append("grid")
+
 #############################################################################
 # Pdist boxplot of all methods
 # ----------------------------
diff --git a/examples/sampler/sampling_comparison.py b/examples/sampler/sampling_comparison.py
index d027ef366..819f5e06d 100644
--- a/examples/sampler/sampling_comparison.py
+++ b/examples/sampler/sampling_comparison.py
@@ -78,23 +78,20 @@ def plot_convergence(result_list, true_minimum=None, yscale=None, title="Converg
     return ax
 
 
-def run(minimizer, initial_point_generator, init_point_gen_kwargs,
-        n_random_starts=10, n_repeats=1):
-    return [minimizer(func, bounds, n_random_starts=n_random_starts,
+def run(minimizer, initial_point_generator,
+        n_initial_points=10, n_repeats=1):
+    return [minimizer(func, bounds, n_initial_points=n_initial_points,
                       initial_point_generator=initial_point_generator,
-                      init_point_gen_kwargs=init_point_gen_kwargs,
                       n_calls=n_calls, random_state=n)
             for n in range(n_repeats)]
 
 
-def run_measure(initial_point_generator,
-                n_random_starts=10,
-                init_point_gen_kwargs=None):
+def run_measure(initial_point_generator, n_initial_points=10):
     start = time.time()
     # n_repeats must set to a much higher value to obtain meaningful results.
     n_repeats = 1
-    res = run(gp_minimize, initial_point_generator, init_point_gen_kwargs,
-              n_random_starts=n_random_starts, n_repeats=n_repeats)
+    res = run(gp_minimize, initial_point_generator,
+              n_initial_points=n_initial_points, n_repeats=n_repeats)
     duration = time.time() - start
     # print("%s %s: %.2f s" % (initial_point_generator,
     #                          str(init_point_gen_kwargs),
@@ -121,7 +118,7 @@ def run_measure(initial_point_generator,
     bounds = [(0., 1.), ] * 6
     true_minimum = -3.32237
     n_calls = 40
-    n_random_starts = 10
+    n_initial_points = 10
     yscale = None
     title = "Convergence plot - hart6"
 else:
@@ -129,25 +126,26 @@ def run_measure(initial_point_generator,
     bounds = [(-5.0, 10.0), (0.0, 15.0)]
     true_minimum = 0.397887
     n_calls = 30
-    n_random_starts = 10
+    n_initial_points = 10
     yscale="log"
     title = "Convergence plot - branin"
 
 #############################################################################
-
+from skopt.utils import cook_initial_point_generator
 
 # Random search
-dummy_res = run_measure( "random", n_random_starts)
-lhs_res = run_measure("lhs", n_random_starts,
-                      {"lhs_type": "classic",
-                       "criterion": None})
-lhs2_res = run_measure("lhs", n_random_starts,
-                       {"criterion": "maximin"})
-sobol_res = run_measure("sobol", n_random_starts,
-                        {"randomize": False,
-                         "min_skip": 1, "max_skip": 100})
-halton_res = run_measure("halton", n_random_starts)
-hammersly_res = run_measure("hammersly", n_random_starts)
+dummy_res = run_measure("random", n_initial_points)
+lhs = cook_initial_point_generator(
+    "lhs", lhs_type="classic", criterion=None)
+lhs_res = run_measure(lhs, n_initial_points)
+lhs2 = cook_initial_point_generator("lhs", criterion="maximin")
+lhs2_res = run_measure(lhs2, n_initial_points)
+sobol = cook_initial_point_generator("sobol", randomize=False,
+                                     min_skip=1, max_skip=100)
+sobol_res = run_measure(sobol, n_initial_points)
+halton_res = run_measure("halton", n_initial_points)
+hammersly_res = run_measure("hammersly", n_initial_points)
+grid_res = run_measure("grid", n_initial_points)
 
 #############################################################################
 # Note that this can take a few minutes.
@@ -157,7 +155,8 @@ def run_measure(initial_point_generator,
                         ("lhs_maximin", lhs2_res),
                         ("sobol", sobol_res),
                         ("halton", halton_res),
-                        ("hammersly", hammersly_res)],
+                        ("hammersly", hammersly_res),
+                        ("grid", grid_res)],
                         true_minimum=true_minimum,
                         yscale=yscale,
                         title=title)
@@ -172,12 +171,10 @@ def run_measure(initial_point_generator,
 
 #############################################################################
 # Test with different n_random_starts values
-lhs2_15_res = run_measure("lhs", 12,
-                          {"criterion": "maximin"})
-lhs2_20_res = run_measure("lhs", 14,
-                          {"criterion": "maximin"})
-lhs2_25_res = run_measure("lhs", 16,
-                          {"criterion": "maximin"})
+lhs2 = cook_initial_point_generator("lhs", criterion="maximin")
+lhs2_15_res = run_measure(lhs2, 12)
+lhs2_20_res = run_measure(lhs2, 14)
+lhs2_25_res = run_measure(lhs2, 16)
 
 #############################################################################
 # n_random_starts = 10 produces the best results
@@ -191,4 +188,4 @@ def run_measure(initial_point_generator,
                         yscale=yscale,
                         title=title)
 
-plt.show()
\ No newline at end of file
+plt.show()
diff --git a/skopt/__init__.py b/skopt/__init__.py
index 33c2ade38..1239ffa5d 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -65,7 +65,7 @@
         "learning",
         "optimizer",
         "plots",
-        "samples",
+        "sampler",
         "space",
         "gp_minimize",
         "dummy_minimize",
diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index 259dc5076..cc32ce9ae 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -6,6 +6,7 @@
 
 import copy
 import inspect
+import warnings
 import numbers
 try:
     from collections.abc import Iterable
@@ -21,13 +22,13 @@
 
 
 def base_minimize(func, dimensions, base_estimator,
-                  n_calls=100, n_random_starts=10,
+                  n_calls=100, n_random_starts=None,
+                  n_initial_points=10,
                   initial_point_generator="random",
                   acq_func="EI", acq_optimizer="lbfgs",
                   x0=None, y0=None, random_state=None, verbose=False,
                   callback=None, n_points=10000, n_restarts_optimizer=5,
-                  xi=0.01, kappa=1.96, n_jobs=1, model_queue_size=None,
-                  init_point_gen_kwargs=None):
+                  xi=0.01, kappa=1.96, n_jobs=1, model_queue_size=None):
     """Base optimizer class
     Parameters
     ----------
@@ -68,9 +69,16 @@ def base_minimize(func, dimensions, base_estimator,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+        .. deprecated::
+            use `n_initial_points` instead.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    n_initial_points : int, default=10
+        Number of evaluations of `func` with initialization points
+        before approximating it with `base_estimator`. Initial point
+        generator can be changed by setting `initial_point_generator`.
+
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
@@ -78,6 +86,7 @@ def base_minimize(func, dimensions, base_estimator,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
+        - "grid" for a uniform grid sequence
 
     acq_func : string, default=`"EI"`
         Function to minimize over the posterior distribution. Can be either
@@ -175,9 +184,6 @@ def base_minimize(func, dimensions, base_estimator,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -223,20 +229,28 @@ def base_minimize(func, dimensions, base_estimator,
         x0 = [x0]
     if not isinstance(x0, list):
         raise ValueError("`x0` should be a list, but got %s" % type(x0))
-    if n_random_starts <= 0 and not x0:
-        raise ValueError("Either set `n_random_starts` > 0,"
+
+    # Check `n_random_starts` deprecation first
+    if n_random_starts is not None:
+        warnings.warn(("n_random_starts will be removed in favour of "
+                       "n_initial_points. It overwrites n_initial_points."),
+                      DeprecationWarning)
+        n_initial_points = n_random_starts
+
+    if n_initial_points <= 0 and not x0:
+        raise ValueError("Either set `n_initial_points` > 0,"
                          " or provide `x0`")
     # check y0: list-like, requirement of maximal calls
     if isinstance(y0, Iterable):
         y0 = list(y0)
     elif isinstance(y0, numbers.Number):
         y0 = [y0]
-    required_calls = n_random_starts + (len(x0) if not y0 else 0)
+    required_calls = n_initial_points + (len(x0) if not y0 else 0)
     if n_calls < required_calls:
         raise ValueError(
             "Expected `n_calls` >= %d, got %d" % (required_calls, n_calls))
     # calculate the total number of initial points
-    n_initial_points = n_random_starts + len(x0)
+    n_initial_points = n_initial_points + len(x0)
 
     # Build optimizer
 
@@ -248,8 +262,7 @@ def base_minimize(func, dimensions, base_estimator,
                           random_state=random_state,
                           model_queue_size=model_queue_size,
                           acq_optimizer_kwargs=acq_optimizer_kwargs,
-                          acq_func_kwargs=acq_func_kwargs,
-                          init_point_gen_kwargs=init_point_gen_kwargs)
+                          acq_func_kwargs=acq_func_kwargs)
     # check x0: element-wise data type, dimensionality
     assert all(isinstance(p, Iterable) for p in x0)
     if not all(len(p) == optimizer.space.n_dims for p in x0):
@@ -260,7 +273,7 @@ def base_minimize(func, dimensions, base_estimator,
     if verbose:
         callbacks.append(VerboseCallback(
             n_init=len(x0) if not y0 else 0,
-            n_random=n_random_starts,
+            n_random=n_initial_points,
             n_total=n_calls))
 
     # Record provided points
diff --git a/skopt/optimizer/dummy.py b/skopt/optimizer/dummy.py
index 6bf43caa9..d436800ea 100644
--- a/skopt/optimizer/dummy.py
+++ b/skopt/optimizer/dummy.py
@@ -44,6 +44,7 @@ def dummy_minimize(func, dimensions, n_calls=100,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
+        - "grid" for a uniform grid sequence
 
     x0 : list, list of lists or `None`
         Initial input points.
@@ -79,9 +80,6 @@ def dummy_minimize(func, dimensions, n_calls=100,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -110,17 +108,16 @@ def dummy_minimize(func, dimensions, n_calls=100,
     # all our calls want random suggestions, except if we need to evaluate
     # some initial points
     if x0 is not None and y0 is None:
-        n_random_calls = n_calls - len(x0)
+        n_initial_points = n_calls - len(x0)
     else:
-        n_random_calls = n_calls
+        n_initial_points = n_calls
 
     return base_minimize(func, dimensions, base_estimator="dummy",
                          # explicitly set optimizer to sampling as "dummy"
                          # minimizer does not provide gradients.
                          acq_optimizer="sampling",
-                         n_calls=n_calls, n_random_starts=n_random_calls,
+                         n_calls=n_calls, n_initial_points=n_initial_points,
                          initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state,
                          verbose=verbose,
-                         callback=callback, model_queue_size=model_queue_size,
-                         init_point_gen_kwargs=init_point_gen_kwargs)
+                         callback=callback, model_queue_size=model_queue_size)
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index ad51ef72a..20ea632cc 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -8,12 +8,11 @@
 
 
 def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
-                    n_random_starts=10, acq_func="EI",
+                    n_random_starts=None, n_initial_points=10, acq_func="EI",
                     initial_point_generator="random",
                     x0=None, y0=None, random_state=None, verbose=False,
                     callback=None, n_points=10000, xi=0.01, kappa=1.96,
-                    n_jobs=1, model_queue_size=None,
-                    init_point_gen_kwargs=None):
+                    n_jobs=1, model_queue_size=None):
     """Sequential optimisation using decision trees.
 
     A tree based regression model is used to model the expensive to evaluate
@@ -76,6 +75,13 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+        .. deprecated::
+            use `n_initial_points` instead.
+
+    n_initial_points : int, default=10
+        Number of evaluations of `func` with initialization points
+        before approximating it with `base_estimator`. Initial point
+        generator can be changed by setting `initial_point_generator`.
 
     initial_point_generator : str, InitialPointGenerator instance,
     default='random'
@@ -86,6 +92,7 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
+        - "grid" for a uniform grid sequence
 
     acq_func : string, default="LCB"
         Function to minimize over the forest posterior. Can be either
@@ -151,9 +158,6 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -184,10 +188,10 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
     return base_minimize(func, dimensions, base_estimator,
                          n_calls=n_calls, n_points=n_points,
                          n_random_starts=n_random_starts,
+                         n_initial_points=n_initial_points,
                          initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state,
                          acq_func=acq_func,
                          xi=xi, kappa=kappa, verbose=verbose,
                          callback=callback, acq_optimizer="sampling",
-                         model_queue_size=model_queue_size,
-                         init_point_gen_kwargs=init_point_gen_kwargs)
+                         model_queue_size=model_queue_size)
diff --git a/skopt/optimizer/gbrt.py b/skopt/optimizer/gbrt.py
index 18f51ce94..479c004d1 100644
--- a/skopt/optimizer/gbrt.py
+++ b/skopt/optimizer/gbrt.py
@@ -6,13 +6,13 @@
 
 
 def gbrt_minimize(func, dimensions, base_estimator=None,
-                  n_calls=100, n_random_starts=10,
+                  n_calls=100, n_random_starts=None,
+                  n_initial_points=10,
                   initial_point_generator="random",
                   acq_func="EI", acq_optimizer="auto",
                   x0=None, y0=None, random_state=None, verbose=False,
                   callback=None, n_points=10000, xi=0.01, kappa=1.96,
-                  n_jobs=1, model_queue_size=None,
-                  init_point_gen_kwargs=None):
+                  n_jobs=1, model_queue_size=None):
     """Sequential optimization using gradient boosted trees.
 
     Gradient boosted regression trees are used to model the (very)
@@ -62,6 +62,13 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+        .. deprecated::
+            use `n_initial_points` instead.
+
+    n_initial_points : int, default=10
+        Number of evaluations of `func` with initialization points
+        before approximating it with `base_estimator`. Initial point
+        generator can be changed by setting `initial_point_generator`.
 
     initial_point_generator : str, InitialPointGenerator instance,
     default='random'
@@ -72,6 +79,7 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
+        - "grid" for a uniform grid sequence
 
     acq_func : string, default=`"LCB"`
         Function to minimize over the forest posterior. Can be either
@@ -135,9 +143,6 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -174,9 +179,9 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
     return base_minimize(func, dimensions, base_estimator,
                          n_calls=n_calls, n_points=n_points,
                          n_random_starts=n_random_starts,
+                         n_initial_points=n_initial_points,
                          initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state, xi=xi,
                          kappa=kappa, acq_func=acq_func, verbose=verbose,
                          callback=callback, acq_optimizer="sampling",
-                         model_queue_size=model_queue_size,
-                         init_point_gen_kwargs=init_point_gen_kwargs)
+                         model_queue_size=model_queue_size)
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index a6091578d..1fe914760 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -10,13 +10,13 @@
 
 
 def gp_minimize(func, dimensions, base_estimator=None,
-                n_calls=100, n_random_starts=10,
+                n_calls=100, n_random_starts=None,
+                n_initial_points=10,
                 initial_point_generator="random",
                 acq_func="gp_hedge", acq_optimizer="auto", x0=None, y0=None,
                 random_state=None, verbose=False, callback=None,
                 n_points=10000, n_restarts_optimizer=5, xi=0.01, kappa=1.96,
-                noise="gaussian", n_jobs=1, model_queue_size=None,
-                init_point_gen_kwargs=None):
+                noise="gaussian", n_jobs=1, model_queue_size=None):
     """Bayesian optimization using Gaussian Processes.
 
     If every function evaluation is expensive, for instance
@@ -88,6 +88,13 @@ def gp_minimize(func, dimensions, base_estimator=None,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+        .. deprecated::
+            use `n_initial_points` instead.
+
+    n_initial_points : int, default=10
+        Number of evaluations of `func` with initialization points
+        before approximating it with `base_estimator`. Initial point
+        generator can be changed by setting `initial_point_generator`.
 
     initial_point_generator : str, InitialPointGenerator instance,
     default='random'
@@ -234,9 +241,6 @@ def gp_minimize(func, dimensions, base_estimator=None,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -281,8 +285,8 @@ def gp_minimize(func, dimensions, base_estimator=None,
         acq_func=acq_func,
         xi=xi, kappa=kappa, acq_optimizer=acq_optimizer, n_calls=n_calls,
         n_points=n_points, n_random_starts=n_random_starts,
+        n_initial_points=n_initial_points,
         initial_point_generator=initial_point_generator,
         n_restarts_optimizer=n_restarts_optimizer,
         x0=x0, y0=y0, random_state=rng, verbose=verbose,
-        callback=callback, n_jobs=n_jobs, model_queue_size=model_queue_size,
-        init_point_gen_kwargs=init_point_gen_kwargs)
+        callback=callback, n_jobs=n_jobs, model_queue_size=model_queue_size)
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index 97bb481be..71f063819 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -16,7 +16,6 @@
 from ..acquisition import _gaussian_acquisition
 from ..acquisition import gaussian_acquisition_1D
 from ..learning import GaussianProcessRegressor
-from ..sampler import Sobol, Lhs, Hammersly, Halton
 from ..space import Categorical
 from ..space import Space
 from ..utils import check_x_in_space
@@ -26,6 +25,7 @@
 from ..utils import is_listlike
 from ..utils import is_2Dlistlike
 from ..utils import normalize_dimensions
+from ..utils import cook_initial_point_generator
 
 
 class Optimizer(object):
@@ -52,8 +52,8 @@ class Optimizer(object):
         - an instance of a `Dimension` object (`Real`, `Integer` or
           `Categorical`).
 
-    base_estimator : `"GP"`, `"RF"`, `"ET"`, `"GBRT"` or sklearn regressor,
-    default=`"GP"`
+    base_estimator : `"GP"`, `"RF"`, `"ET"`, `"GBRT"` or sklearn regressor, \
+            default=`"GP"`
         Should inherit from :obj:`sklearn.base.RegressorMixin`.
         In addition the `predict` method, should have an optional `return_std`
         argument, which returns `std(Y | x)`` along with `E[Y | x]`.
@@ -70,15 +70,16 @@ class Optimizer(object):
         before approximating it with `base_estimator`. Initial point
         generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
         - "sobol" for a Sobol sequence,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
-        - "lhs" for a latin hypercube sequence
+        - "lhs" for a latin hypercube sequence,
+        - "grid" for a uniform grid sequence
 
     acq_func : string, default=`"gp_hedge"`
         Function to minimize over the posterior distribution. Can be either
@@ -131,9 +132,6 @@ class Optimizer(object):
     acq_optimizer_kwargs : dict
         Additional arguments to be passed to the acquistion optimizer.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     model_queue_size : int or None, default=None
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
@@ -161,8 +159,7 @@ def __init__(self, dimensions, base_estimator="gp",
                  random_state=None,
                  model_queue_size=None,
                  acq_func_kwargs=None,
-                 acq_optimizer_kwargs=None,
-                 init_point_gen_kwargs=None):
+                 acq_optimizer_kwargs=None):
 
         self.rng = check_random_state(random_state)
 
@@ -262,29 +259,10 @@ def __init__(self, dimensions, base_estimator="gp",
         self.space = Space(dimensions)
 
         self._initial_samples = None
-        self._initial_point_generator = initial_point_generator
-        if init_point_gen_kwargs is None:
-            init_point_gen_kwargs = dict()
-        self.init_point_gen_kwargs = init_point_gen_kwargs
-        if initial_point_generator != "random" and \
-                isinstance(initial_point_generator, str):
-            if initial_point_generator == "sobol":
-                self._initial_point_generator = Sobol(
-                    **self.init_point_gen_kwargs)
-            elif initial_point_generator == "halton":
-                self._initial_point_generator = Halton(
-                    **self.init_point_gen_kwargs)
-            elif initial_point_generator == "hammersly":
-                self._initial_point_generator = Hammersly(
-                    **self.init_point_gen_kwargs)
-            elif initial_point_generator == "lhs":
-                self._initial_point_generator = Lhs(
-                    **self.init_point_gen_kwargs)
-            else:
-                raise ValueError(
-                    "Unkown initial_point_generator: " +
-                    str(initial_point_generator)
-                )
+        self._initial_point_generator = cook_initial_point_generator(
+            initial_point_generator)
+
+        if self._initial_point_generator is not None:
             transformer = self.space.get_transformer()
             self._initial_samples = self._initial_point_generator.generate(
                 self.space.dimensions, n_initial_points,
@@ -333,7 +311,6 @@ def copy(self, random_state=None):
             acq_optimizer=self.acq_optimizer,
             acq_func_kwargs=self.acq_func_kwargs,
             acq_optimizer_kwargs=self.acq_optimizer_kwargs,
-            init_point_gen_kwargs=self.init_point_gen_kwargs,
             random_state=random_state,
         )
         optimizer._initial_samples = self._initial_samples
diff --git a/skopt/sampler/__init__.py b/skopt/sampler/__init__.py
index c8c67693c..61224ac69 100644
--- a/skopt/sampler/__init__.py
+++ b/skopt/sampler/__init__.py
@@ -5,9 +5,12 @@
 from .sobol import Sobol
 from .halton import Halton
 from .hammersly import Hammersly
+from .grid import Grid
+from .base import InitialPointGenerator
 
 
 __all__ = [
     "Lhs", "Sobol",
-    "Halton", "Hammersly"
+    "Halton", "Hammersly",
+    "Grid", "InitialPointGenerator"
 ]
diff --git a/skopt/sampler/base.py b/skopt/sampler/base.py
index 7811328c3..39dc6af5c 100644
--- a/skopt/sampler/base.py
+++ b/skopt/sampler/base.py
@@ -1,4 +1,28 @@
 
+from collections import defaultdict
+
+
 class InitialPointGenerator(object):
     def generate(self, dimensions, n_samples, random_state=None):
         raise NotImplemented
+
+    def set_params(self, **params):
+        """
+        Set the parameters of this initial point generator.
+
+        Parameters
+        ----------
+        **params : dict
+            Generator parameters.
+        Returns
+        -------
+        self : object
+            Generator instance.
+        """
+        if not params:
+            # Simple optimization to gain speed (inspect is slow)
+            return self
+        for key, value in params.items():
+            setattr(self, key, value)
+
+        return self
diff --git a/skopt/sampler/grid.py b/skopt/sampler/grid.py
new file mode 100644
index 000000000..a631e8bc6
--- /dev/null
+++ b/skopt/sampler/grid.py
@@ -0,0 +1,144 @@
+"""
+Inspired by https://github.com/jonathf/chaospy/blob/master/chaospy/
+distributions/sampler/sequences/grid.py
+"""
+import numpy as np
+from .base import InitialPointGenerator
+from ..space import Space
+from sklearn.utils import check_random_state
+
+
+def _quadrature_combine(args):
+    args = [np.asarray(arg).reshape(len(arg), -1) for arg in args]
+    shapes = [arg.shape for arg in args]
+
+    size = np.prod(shapes, 0)[0] * np.sum(shapes, 0)[1]
+    if size > 10 ** 9:
+        raise MemoryError("Too large sets")
+
+    out = args[0]
+    for arg in args[1:]:
+        out = np.hstack([
+            np.tile(out, len(arg)).reshape(-1, out.shape[1]),
+            np.tile(arg.T, len(out)).reshape(arg.shape[1], -1).T,
+        ])
+    return out
+
+
+def _create_uniform_grid_exclude_border(n_dim, order):
+    assert order > 0
+    assert n_dim > 0
+    x_data = np.arange(1, order + 1) / (order + 1.)
+    x_data = _quadrature_combine([x_data] * n_dim)
+    return x_data
+
+
+def _create_uniform_grid_include_border(n_dim, order):
+    assert order > 1
+    assert n_dim > 0
+    x_data = np.arange(0, order) / (order - 1.)
+    x_data = _quadrature_combine([x_data] * n_dim)
+    return x_data
+
+
+def _create_uniform_grid_only_border(n_dim, order):
+    assert n_dim > 0
+    assert order > 1
+    x = [[0., 1.]] * (n_dim - 1)
+    x.append(list(np.arange(0, order) / (order - 1.)))
+    x_data = _quadrature_combine(x)
+    return x_data
+
+
+class Grid(InitialPointGenerator):
+    """Generate samples from a regular grid."""
+
+    def __init__(self, border="exclude", use_full_layout=True, append_border="only"):
+        self.border = border
+        self.use_full_layout = use_full_layout
+        self.append_border = append_border
+
+    def generate(self, dimensions, n_samples, random_state=None):
+        """Creates samples from a regular grid.
+
+        Parameters
+        ----------
+        dimensions : list, shape (n_dims,)
+            List of search space dimensions.
+            Each search dimension can be defined either as
+
+            - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
+              dimensions),
+            - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
+              dimensions),
+            - as a list of categories (for `Categorical` dimensions), or
+            - an instance of a `Dimension` object (`Real`, `Integer` or
+              `Categorical`).
+
+        n_samples : int
+            The order of the Halton sequence. Defines the number of samples.
+        random_state : int, RandomState instance, or None (default)
+            Set random state to something other than None for reproducible
+            results.
+
+        Returns
+        -------
+        np.array, shape=(n_dim, n_samples)
+            grid set
+        """
+        rng = check_random_state(random_state)
+        space = Space(dimensions)
+        n_dim = space.n_dims
+        transformer = space.get_transformer()
+        space.set_transformer("normalize")
+
+        if self.border == "include":
+            if self.use_full_layout:
+                order = int(np.floor(np.sqrt(n_samples)))
+            else:
+                order = int(np.ceil(np.sqrt(n_samples)))
+            if order < 2:
+                order = 2
+            h = _create_uniform_grid_include_border(n_dim, order)
+        elif self.border == "exclude":
+            if self.use_full_layout:
+                order = int(np.floor(np.sqrt(n_samples)))
+            else:
+                order = int(np.ceil(np.sqrt(n_samples)))
+            if order < 1:
+                order = 1
+            h = _create_uniform_grid_exclude_border(n_dim, order)
+        elif self.border == "only":
+            if self.use_full_layout:
+                order = int(np.floor(n_samples / 2.))
+            else:
+                order = int(np.ceil(n_samples / 2.))
+            if order < 2:
+                order = 2
+            h = _create_uniform_grid_exclude_border(n_dim, order)
+        if np.size(h, 0) > n_samples:
+            rng.shuffle(h)
+            h = h[:n_samples, :]
+        elif np.size(h, 0) < n_samples:
+            if self.append_border == "only":
+                order = int(np.ceil((n_samples - np.size(h, 0)) / 2.))
+                if order < 2:
+                    order = 2
+                h2 = _create_uniform_grid_only_border(n_dim, order)
+            elif self.append_border == "include":
+                order = int(np.ceil(np.sqrt(n_samples - np.size(h, 0))))
+                if order < 2:
+                    order = 2
+                h2 = _create_uniform_grid_include_border(n_dim, order)
+            elif self.append_border == "exclude":
+                order = int(np.ceil(np.sqrt(n_samples - np.size(h, 0))))
+                if order < 1:
+                    order = 1
+                h2 = _create_uniform_grid_exclude_border(n_dim, order)
+            h = np.vstack((h, h2[:(n_samples - np.size(h, 0))]))
+            rng.shuffle(h)
+        else:
+            rng.shuffle(h)
+        h = space.inverse_transform(h)
+        space.set_transformer(transformer)
+        return h
diff --git a/skopt/sampler/halton.py b/skopt/sampler/halton.py
index e8ea9fc8d..55b48ffb6 100644
--- a/skopt/sampler/halton.py
+++ b/skopt/sampler/halton.py
@@ -58,6 +58,7 @@ def generate(self, dimensions, n_samples, random_state=None):
         random_state : int, RandomState instance, or None (default)
             Set random state to something other than None for reproducible
             results.
+
         Returns
         -------
         np.array, shape=(n_dim, n_samples)
diff --git a/skopt/sampler/lhs.py b/skopt/sampler/lhs.py
index 416a90770..ef22f7d76 100644
--- a/skopt/sampler/lhs.py
+++ b/skopt/sampler/lhs.py
@@ -6,11 +6,20 @@
 import numpy as np
 from sklearn.utils import check_random_state
 from scipy import spatial
-from ..utils import random_permute_matrix
 from ..space import Space, Categorical
 from .base import InitialPointGenerator
 
 
+def _random_permute_matrix(h, random_state=None):
+    rng = check_random_state(random_state)
+    h_rand_perm = np.zeros_like(h)
+    samples, n = h.shape
+    for j in range(n):
+        order = rng.permutation(range(samples))
+        h_rand_perm[:, j] = h[order, j]
+    return h_rand_perm
+
+
 class Lhs(InitialPointGenerator):
     """Latin hypercube sampling
 
@@ -132,4 +141,4 @@ def _lhs_normalized(self, n_dim, n_samples, random_state):
                 h[:, j] = u[:, j] * np.diff(x) + x[:n_samples]
         else:
             raise ValueError("Wrong lhs_type. Got ".format(self.lhs_type))
-        return random_permute_matrix(h, random_state=rng)
+        return _random_permute_matrix(h, random_state=rng)
diff --git a/skopt/tests/test_common.py b/skopt/tests/test_common.py
index 9b299e43f..ac9cbdf78 100644
--- a/skopt/tests/test_common.py
+++ b/skopt/tests/test_common.py
@@ -113,11 +113,11 @@ def test_minimizer_api_dummy_minimize(verbose, call):
 @pytest.mark.parametrize("minimizer", MINIMIZERS)
 def test_minimizer_api(verbose, call, minimizer):
     n_calls = 7
-    n_random_starts = 3
-    n_models = n_calls - n_random_starts + 1
+    n_initial_points = 3
+    n_models = n_calls - n_initial_points + 1
 
     result = minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                       n_random_starts=n_random_starts,
+                       n_initial_points=n_initial_points,
                        n_calls=n_calls,
                        random_state=1,
                        verbose=verbose, callback=call)
@@ -133,10 +133,10 @@ def test_minimizer_api(verbose, call, minimizer):
 def test_minimizer_api_random_only(minimizer):
     # no models should be fit as we only evaluate at random points
     n_calls = 5
-    n_random_starts = 5
+    n_initial_points = 5
 
     result = minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                       n_random_starts=n_random_starts,
+                       n_initial_points=n_initial_points,
                        n_calls=n_calls,
                        random_state=1)
 
@@ -150,15 +150,15 @@ def test_fixed_random_states(minimizer):
     # check that two runs produce exactly same results, if not there is a
     # random state somewhere that is not reproducible
     n_calls = 4
-    n_random_starts = 2
+    n_initial_points = 2
 
     space = [(-5.0, 10.0), (0.0, 15.0)]
     result1 = minimizer(branin, space, n_calls=n_calls,
-                        n_random_starts=n_random_starts, random_state=1)
+                        n_initial_points=n_initial_points, random_state=1)
 
     dimensions = [(-5.0, 10.0), (0.0, 15.0)]
     result2 = minimizer(branin, dimensions, n_calls=n_calls,
-                        n_random_starts=n_random_starts, random_state=1)
+                        n_initial_points=n_initial_points, random_state=1)
 
     assert_array_almost_equal(result1.x_iters, result2.x_iters)
     assert_array_almost_equal(result1.func_vals, result2.func_vals)
@@ -170,28 +170,28 @@ def test_minimizer_with_space(minimizer):
     # check we can pass a Space instance as dimensions argument and get same
     # result
     n_calls = 4
-    n_random_starts = 2
+    n_initial_points = 2
 
     space = Space([(-5.0, 10.0), (0.0, 15.0)])
     space_result = minimizer(branin, space, n_calls=n_calls,
-                             n_random_starts=n_random_starts, random_state=1)
+                             n_initial_points=n_initial_points, random_state=1)
 
     check_minimizer_api(space_result, n_calls)
     check_minimizer_bounds(space_result, n_calls)
 
     dimensions = [(-5.0, 10.0), (0.0, 15.0)]
     result = minimizer(branin, dimensions, n_calls=n_calls,
-                       n_random_starts=n_random_starts, random_state=1)
+                       n_initial_points=n_initial_points, random_state=1)
 
     assert_array_almost_equal(space_result.x_iters, result.x_iters)
     assert_array_almost_equal(space_result.func_vals, result.func_vals)
 
 
 @pytest.mark.slow_test
-@pytest.mark.parametrize("n_random_starts", [0, 1, 2, 3, 4])
+@pytest.mark.parametrize("n_initial_points", [0, 1, 2, 3, 4])
 @pytest.mark.parametrize("optimizer_func",
                          [gp_minimize, forest_minimize, gbrt_minimize])
-def test_init_vals_and_models(n_random_starts, optimizer_func):
+def test_init_vals_and_models(n_initial_points, optimizer_func):
     # test how many models are fitted when using initial points, y0 values
     # and random starts
     space = [(-5.0, 10.0), (0.0, 15.0)]
@@ -199,40 +199,40 @@ def test_init_vals_and_models(n_random_starts, optimizer_func):
     y0 = list(map(branin, x0))
     n_calls = 7
 
-    optimizer = partial(optimizer_func, n_random_starts=n_random_starts)
+    optimizer = partial(optimizer_func, n_initial_points=n_initial_points)
     res = optimizer(branin, space, x0=x0, y0=y0, random_state=0,
                     n_calls=n_calls)
 
-    assert_equal(len(res.models), n_calls - n_random_starts + 1)
+    assert_equal(len(res.models), n_calls - n_initial_points + 1)
 
 
 @pytest.mark.slow_test
-@pytest.mark.parametrize("n_random_starts", [0, 1, 2, 3, 4])
+@pytest.mark.parametrize("n_initial_points", [0, 1, 2, 3, 4])
 @pytest.mark.parametrize("optimizer_func",
                          [gp_minimize, forest_minimize, gbrt_minimize])
-def test_init_points_and_models(n_random_starts, optimizer_func):
+def test_init_points_and_models(n_initial_points, optimizer_func):
     # test how many models are fitted when using initial points and random
     # starts (no y0 in this case)
     space = [(-5.0, 10.0), (0.0, 15.0)]
     x0 = [[1, 2], [3, 4], [5, 6]]
     n_calls = 7
 
-    optimizer = partial(optimizer_func, n_random_starts=n_random_starts)
+    optimizer = partial(optimizer_func, n_initial_points=n_initial_points)
     res = optimizer(branin, space, x0=x0, random_state=0,
                     n_calls=n_calls)
-    assert_equal(len(res.models), n_calls - len(x0) - n_random_starts + 1)
+    assert_equal(len(res.models), n_calls - len(x0) - n_initial_points + 1)
 
 
 @pytest.mark.slow_test
-@pytest.mark.parametrize("n_random_starts", [0, 5])
+@pytest.mark.parametrize("n_initial_points", [0, 5])
 @pytest.mark.parametrize("optimizer_func",
                          [gp_minimize, forest_minimize, gbrt_minimize])
-def test_init_vals(n_random_starts, optimizer_func):
+def test_init_vals(n_initial_points, optimizer_func):
     space = [(-5.0, 10.0), (0.0, 15.0)]
     x0 = [[1, 2], [3, 4], [5, 6]]
-    n_calls = len(x0) + n_random_starts + 1
+    n_calls = len(x0) + n_initial_points + 1
 
-    optimizer = partial(optimizer_func, n_random_starts=n_random_starts)
+    optimizer = partial(optimizer_func, n_initial_points=n_initial_points)
     check_init_vals(optimizer, branin, space, x0, n_calls)
 
 
@@ -247,9 +247,9 @@ def test_init_vals_dummy_minimize():
 @pytest.mark.slow_test
 @pytest.mark.parametrize("optimizer", [
         dummy_minimize,
-        partial(gp_minimize, n_random_starts=0),
-        partial(forest_minimize, n_random_starts=0),
-        partial(gbrt_minimize, n_random_starts=0)])
+        partial(gp_minimize, n_initial_points=0),
+        partial(forest_minimize, n_initial_points=0),
+        partial(gbrt_minimize, n_initial_points=0)])
 def test_categorical_init_vals(optimizer):
     space = [("-2", "-1", "0", "1", "2")]
     x0 = [["0"], ["1"], ["2"]]
@@ -260,9 +260,9 @@ def test_categorical_init_vals(optimizer):
 @pytest.mark.slow_test
 @pytest.mark.parametrize("optimizer", [
         dummy_minimize,
-        partial(gp_minimize, n_random_starts=0),
-        partial(forest_minimize, n_random_starts=0),
-        partial(gbrt_minimize, n_random_starts=0)])
+        partial(gp_minimize, n_initial_points=0),
+        partial(forest_minimize, n_initial_points=0),
+        partial(gbrt_minimize, n_initial_points=0)])
 def test_mixed_spaces(optimizer):
     space = [("-2", "-1", "0", "1", "2"), (-2.0, 2.0)]
     x0 = [["0", 2.0], ["1", 1.0], ["2", 1.0]]
@@ -326,24 +326,24 @@ def test_invalid_n_calls_arguments(minimizer):
 
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                  n_random_starts=0, random_state=1)
+                  n_initial_points=0, random_state=1)
 
-    # n_calls >= n_random_starts
+    # n_calls >= n_initial_points
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                  n_calls=1, n_random_starts=10, random_state=1)
+                  n_calls=1, n_initial_points=10, random_state=1)
 
-    # n_calls >= n_random_starts + len(x0)
+    # n_calls >= n_initial_points + len(x0)
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)], n_calls=1,
                   x0=[[-1, 2], [-3, 3], [2, 5]], random_state=1,
-                  n_random_starts=7)
+                  n_initial_points=7)
 
-    # n_calls >= n_random_starts
+    # n_calls >= n_initial_points
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)], n_calls=1,
                   x0=[[-1, 2], [-3, 3], [2, 5]], y0=[2.0, 3.0, 5.0],
-                  random_state=1, n_random_starts=7)
+                  random_state=1, n_initial_points=7)
 
 
 @pytest.mark.fast_test
@@ -351,7 +351,7 @@ def test_invalid_n_calls_arguments(minimizer):
 def test_repeated_x(minimizer):
     with pytest.warns(None) as record:
         minimizer(lambda x: x[0], dimensions=[[0, 1]], x0=[[0], [1]],
-                  n_random_starts=0, n_calls=3)
+                  n_initial_points=0, n_calls=3)
     assert len(record) > 0
     w = record.pop(UserWarning)
     assert issubclass(w.category, UserWarning)
@@ -359,7 +359,7 @@ def test_repeated_x(minimizer):
 
     with pytest.warns(None) as record:
         minimizer(bench4, dimensions=[("0", "1")], x0=[["0"], ["1"]],
-                  n_calls=3, n_random_starts=0)
+                  n_calls=3, n_initial_points=0)
         assert len(record) > 0
         w = record.pop(UserWarning)
         assert issubclass(w.category, UserWarning)
@@ -376,23 +376,23 @@ def test_consistent_x_iter_dimensions(minimizer):
     res = minimizer(bench1,
                     dimensions=[(0, 1), (2, 3)],
                     x0=[[0, 2], [1, 2]], n_calls=3,
-                    n_random_starts=0)
+                    n_initial_points=0)
     assert len(set(len(x) for x in res.x_iters)) == 1
     assert len(res.x_iters[0]) == 2
 
     # one dimensional problem
     res = minimizer(bench1, dimensions=[(0, 1)], x0=[[0], [1]], n_calls=3,
-                    n_random_starts=0)
+                    n_initial_points=0)
     assert len(set(len(x) for x in res.x_iters)) == 1
     assert len(res.x_iters[0]) == 1
 
     with pytest.raises(RuntimeError):
         minimizer(bench1, dimensions=[(0, 1)],
-                  x0=[[0, 1]], n_calls=3, n_random_starts=0)
+                  x0=[[0, 1]], n_calls=3, n_initial_points=0)
 
     with pytest.raises(RuntimeError):
         minimizer(bench1, dimensions=[(0, 1)],
-                  x0=[0, 1], n_calls=3, n_random_starts=0)
+                  x0=[0, 1], n_calls=3, n_initial_points=0)
 
 
 @pytest.mark.slow_test
@@ -405,7 +405,7 @@ def test_early_stopping_delta_x(minimizer):
                     dimensions=[(-1., 1.)],
                     x0=[[-0.1], [0.1], [-0.9]],
                     n_calls=n_calls,
-                    n_random_starts=0, random_state=1)
+                    n_initial_points=0, random_state=1)
     assert len(res.x_iters) < n_calls
 
 
@@ -420,7 +420,7 @@ def test_early_stopping_delta_x_empty_result_object(minimizer):
                     callback=DeltaXStopper(0.1),
                     dimensions=[(-1., 1.)],
                     n_calls=n_calls,
-                    n_random_starts=1, random_state=1)
+                    n_initial_points=1, random_state=1)
     assert len(res.x_iters) < n_calls
 
 
@@ -433,6 +433,6 @@ def bench1_with_time(x):
 
     n_calls = 3
     res = minimizer(bench1_with_time, [(-2.0, 2.0)],
-                    acq_func=acq_func, n_calls=n_calls, n_random_starts=1,
+                    acq_func=acq_func, n_calls=n_calls, n_initial_points=1,
                     random_state=1)
     assert len(res.log_time) == n_calls
diff --git a/skopt/tests/test_forest_opt.py b/skopt/tests/test_forest_opt.py
index 204bee108..b0aaf2566 100644
--- a/skopt/tests/test_forest_opt.py
+++ b/skopt/tests/test_forest_opt.py
@@ -28,11 +28,11 @@ def test_forest_minimize_api(base_estimator):
 
 
 def check_minimize(minimizer, func, y_opt, dimensions, margin,
-                   n_calls, n_random_starts=10, x0=None):
+                   n_calls, n_initial_points=10, x0=None):
     for n in range(3):
         r = minimizer(
             func, dimensions, n_calls=n_calls, random_state=n,
-            n_random_starts=n_random_starts, x0=x0)
+            n_initial_points=n_initial_points, x0=x0)
         assert r.fun < y_opt + margin
 
 
@@ -64,5 +64,5 @@ def f(params):
 
     dims = [[1]]
     res = forest_minimize(f, dims, n_calls=1, random_state=1,
-                          n_random_starts=1)
+                          n_initial_points=1)
     assert res.x_iters[0][0] == dims[0][0]
diff --git a/skopt/tests/test_gp_opt.py b/skopt/tests/test_gp_opt.py
index 8734aff5d..ffd5f356f 100644
--- a/skopt/tests/test_gp_opt.py
+++ b/skopt/tests/test_gp_opt.py
@@ -12,9 +12,9 @@
 
 
 def check_minimize(func, y_opt, bounds, acq_optimizer, acq_func,
-                   margin, n_calls, n_random_starts=10, init_gen="random"):
+                   margin, n_calls, n_initial_points=10, init_gen="random"):
     r = gp_minimize(func, bounds, acq_optimizer=acq_optimizer,
-                    acq_func=acq_func, n_random_starts=n_random_starts,
+                    acq_func=acq_func, n_initial_points=n_initial_points,
                     n_calls=n_calls, random_state=1,
                     initial_point_generator=init_gen,
                     noise=1e-10)
@@ -72,10 +72,10 @@ def test_gp_minimize_bench4(search, acq):
 @pytest.mark.fast_test
 def test_n_jobs():
     r_single = gp_minimize(bench3, [(-2.0, 2.0)], acq_optimizer="lbfgs",
-                           acq_func="EI", n_calls=2, n_random_starts=1,
+                           acq_func="EI", n_calls=2, n_initial_points=1,
                            random_state=1, noise=1e-10)
     r_double = gp_minimize(bench3, [(-2.0, 2.0)], acq_optimizer="lbfgs",
-                           acq_func="EI", n_calls=2, n_random_starts=1,
+                           acq_func="EI", n_calls=2, n_initial_points=1,
                            random_state=1, noise=1e-10, n_jobs=2)
     assert_array_equal(r_single.x_iters, r_double.x_iters)
 
@@ -83,7 +83,7 @@ def test_n_jobs():
 @pytest.mark.fast_test
 def test_gpr_default():
     """Smoke test that gp_minimize does not fail for default values."""
-    gp_minimize(branin, ((-5.0, 10.0), (0.0, 15.0)), n_random_starts=1,
+    gp_minimize(branin, ((-5.0, 10.0), (0.0, 15.0)), n_initial_points=1,
                 n_calls=2)
 
 
@@ -95,7 +95,7 @@ def test_use_given_estimator():
     noise_correct = 1e+5
     noise_fake = 1e-10
     estimator = cook_estimator("GP", domain, noise=noise_correct)
-    res = gp_minimize(branin, domain, n_calls=1, n_random_starts=1,
+    res = gp_minimize(branin, domain, n_calls=1, n_initial_points=1,
                       base_estimator=estimator, noise=noise_fake)
 
     assert res['models'][-1].noise == noise_correct
@@ -109,7 +109,7 @@ def test_use_given_estimator_with_max_model_size():
     noise_correct = 1e+5
     noise_fake = 1e-10
     estimator = cook_estimator("GP", domain, noise=noise_correct)
-    res = gp_minimize(branin, domain, n_calls=1, n_random_starts=1,
+    res = gp_minimize(branin, domain, n_calls=1, n_initial_points=1,
                       base_estimator=estimator, noise=noise_fake,
                       model_queue_size=1)
     assert len(res['models']) == 1
@@ -122,7 +122,7 @@ def f(params):
         return 0
 
     dims = [[1]]
-    res = gp_minimize(f, dims, n_calls=1, n_random_starts=1,
+    res = gp_minimize(f, dims, n_calls=1, n_initial_points=1,
                       random_state=1)
     assert res.x_iters[0][0] == dims[0][0]
 
diff --git a/skopt/tests/test_sampler.py b/skopt/tests/test_sampler.py
index bae6a1617..958188c64 100644
--- a/skopt/tests/test_sampler.py
+++ b/skopt/tests/test_sampler.py
@@ -9,7 +9,7 @@
 from numpy.testing import assert_almost_equal
 from numpy.testing import assert_array_equal
 from numpy.testing import assert_equal
-from numpy.testing import assert_raises_regex
+from numpy.testing import assert_raises
 from scipy import spatial
 from skopt import Optimizer
 from skopt.space import Space
@@ -19,7 +19,12 @@
 from skopt.space import check_dimension as space_check_dimension
 from skopt.sampler.sobol import _bit_lo0, _bit_hi1
 from skopt.sampler.halton import _van_der_corput_samples, _create_primes
-from skopt.sampler import Hammersly, Halton, Lhs, Sobol
+from skopt.sampler import Hammersly, Halton, Lhs, Sobol, Grid
+from skopt.sampler import InitialPointGenerator
+from skopt.sampler.grid import _create_uniform_grid_include_border
+from skopt.sampler.grid import _create_uniform_grid_exclude_border
+from skopt.sampler.grid import _quadrature_combine
+from skopt.sampler.grid import _create_uniform_grid_only_border
 
 
 @pytest.mark.fast_test
@@ -118,6 +123,10 @@ def test_generate():
     assert_array_equal(x[1, :], [0.75, 0.25, 0.75])
     assert_array_equal(x[2, :], [0.25, 0.75, 0.25])
 
+    sobol.set_params(max_skip=2)
+    assert sobol.max_skip == 2
+    assert isinstance(sobol, InitialPointGenerator)
+
 
 @pytest.mark.fast_test
 def test_van_der_corput():
@@ -174,3 +183,54 @@ def test_primes():
     assert_array_equal(x, [2, 3])
     x = _create_primes(20)
     assert_array_equal(x, [2, 3, 5, 7, 11, 13, 17, 19])
+
+
+@pytest.mark.fast_test
+def test_quadrature_combine():
+    a = [1, 2]
+    b = [[4, 4], [5, 6]]
+    x = [[1, 4, 4], [1, 5, 6], [2, 4, 4], [2, 5, 6]]
+    x_test = _quadrature_combine([a, b])
+    assert_array_equal(x_test, x)
+
+
+@pytest.mark.fast_test
+def test_uniform_grid():
+    x = _create_uniform_grid_exclude_border(1, 2)
+    assert_array_equal(x, [[1./3.], [2./3.]])
+    x = _create_uniform_grid_include_border(1, 2)
+    assert_array_equal(x, [[0.], [1.]])
+    x = _create_uniform_grid_only_border(1, 2)
+    assert_array_equal(x, [[0.], [1.]])
+
+    x = _create_uniform_grid_exclude_border(1, 3)
+    assert_array_equal(x, [[1./4.], [2./4.], [3./4.]])
+    x = _create_uniform_grid_include_border(1, 3)
+    assert_array_equal(x, [[0./2.], [1./2.], [2./2.]])
+    x = _create_uniform_grid_only_border(1, 3)
+    assert_array_equal(x, [[0./2.], [1./2.], [2./2.]])
+
+    x = _create_uniform_grid_exclude_border(1, 5)
+    assert_array_equal(x, [[1./6.], [2./6.], [3./6.], [4./6.], [5./6.]])
+    x = _create_uniform_grid_include_border(1, 5)
+    assert_array_equal(x, [[0./4.], [1./4.], [2./4.], [3./4.], [4./4.]])
+    x = _create_uniform_grid_only_border(1, 5)
+    assert_array_equal(x, [[0./4.], [1./4.], [2./4.], [3./4.], [4./4.]])
+
+    x = _create_uniform_grid_exclude_border(2, 2)
+    assert_array_equal(x, [[1. / 3., 1./3.], [1. / 3., 2. / 3.],
+                           [2. / 3., 1. / 3.], [2. / 3., 2. / 3.]])
+    x = _create_uniform_grid_include_border(2, 2)
+    assert_array_equal(x, [[0., 0.], [0., 1.],
+                           [1., 0.], [1., 1.]])
+    x = _create_uniform_grid_only_border(2, 3)
+    assert_array_equal(x, [[0., 0.], [0., 0.5],
+                           [0., 1.], [1., 0.],
+                           [1., 0.5], [1., 1.]])
+
+    assert_raises(AssertionError, _create_uniform_grid_exclude_border, 1, 0)
+    assert_raises(AssertionError, _create_uniform_grid_exclude_border, 0, 1)
+    assert_raises(AssertionError, _create_uniform_grid_include_border, 1, 0)
+    assert_raises(AssertionError, _create_uniform_grid_include_border, 0, 1)
+    assert_raises(AssertionError, _create_uniform_grid_only_border, 1, 1)
+    assert_raises(AssertionError, _create_uniform_grid_only_border, 0, 2)
diff --git a/skopt/utils.py b/skopt/utils.py
index 395413acf..0dfffe897 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -16,7 +16,8 @@
 from .learning.gaussian_process.kernels import ConstantKernel
 from .learning.gaussian_process.kernels import HammingKernel
 from .learning.gaussian_process.kernels import Matern
-
+from .sampler import Sobol, Lhs, Hammersly, Halton, Grid
+from .sampler import InitialPointGenerator
 from .space import Space, Categorical, Integer, Real, Dimension
 
 
@@ -342,7 +343,7 @@ def cook_estimator(base_estimator, space=None, **kwargs):
     Parameters
     ----------
     base_estimator : "GP", "RF", "ET", "GBRT", "DUMMY"
-                        or sklearn regressor, default="GP"
+                        or sklearn regressor
         Should inherit from `sklearn.base.RegressorMixin`.
         In addition the `predict` method should have an optional `return_std`
         argument, which returns `std(Y | x)`` along with `E[Y | x]`.
@@ -406,6 +407,50 @@ def cook_estimator(base_estimator, space=None, **kwargs):
     return base_estimator
 
 
+def cook_initial_point_generator(generator, **kwargs):
+    """
+    Cook a default initial point generator.
+
+    For the special generator called "random" the return value is None.
+
+    Parameters
+    ----------
+    generator : "lhs", "sobol", "halton", "hammersly", "grid", "random"
+                        or InitialPointGenerator instance"
+        Should inherit from `skopt.sampler.InitialPointGenerator`.
+
+    kwargs : dict
+        Extra parameters provided to the generator at init time.
+    """
+    if isinstance(generator, str):
+        generator = generator.lower()
+        if generator not in ["sobol", "halton", "hammersly", "lhs", "random",
+                             "grid"]:
+            raise ValueError("Valid strings for the generator parameter "
+                             " are: 'sobol', 'lhs', 'halton', 'hammersly',"
+                             "'random', or 'grid' not "
+                             "%s." % generator)
+    elif not isinstance(generator, InitialPointGenerator):
+        raise ValueError("generator has to be an InitialPointGenerator.")
+    if generator == "random":
+        return None
+    elif generator is None:
+        generator = "lhs"
+    if isinstance(generator, str):
+        if generator == "sobol":
+            generator = Sobol()
+        elif generator == "halton":
+            generator = Halton()
+        elif generator == "hammersly":
+            generator = Hammersly()
+        elif generator == "lhs":
+            generator = Lhs()
+        elif generator == "grid":
+            generator = Grid()
+    generator.set_params(**kwargs)
+    return generator
+
+
 def dimensions_aslist(search_space):
     """Convert a dict representation of a search space into a list of
     dimensions, ordered by sorted(search_space.keys()).
@@ -768,13 +813,3 @@ def wrapper(x):
         return wrapper
 
     return decorator
-
-
-def random_permute_matrix(h, random_state=None):
-    rng = check_random_state(random_state)
-    h_rand_perm = np.zeros_like(h)
-    samples, n = h.shape
-    for j in range(n):
-        order = rng.permutation(range(samples))
-        h_rand_perm[:, j] = h[order, j]
-    return h_rand_perm

From d90109c37241bdda96cd0164dc3a95f4731dbe30 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 15:56:51 +0100
Subject: [PATCH 089/265] Fix unit test

---
 skopt/optimizer/optimizer.py | 2 +-
 skopt/sampler/grid.py        | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index 71f063819..e7b60b308 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -311,7 +311,7 @@ def copy(self, random_state=None):
             acq_optimizer=self.acq_optimizer,
             acq_func_kwargs=self.acq_func_kwargs,
             acq_optimizer_kwargs=self.acq_optimizer_kwargs,
-            random_state=random_state,
+            random_state=random_state
         )
         optimizer._initial_samples = self._initial_samples
         if hasattr(self, "gains_"):
diff --git a/skopt/sampler/grid.py b/skopt/sampler/grid.py
index a631e8bc6..620c04e87 100644
--- a/skopt/sampler/grid.py
+++ b/skopt/sampler/grid.py
@@ -116,6 +116,8 @@ def generate(self, dimensions, n_samples, random_state=None):
             if order < 2:
                 order = 2
             h = _create_uniform_grid_exclude_border(n_dim, order)
+        else:
+            raise ValueError("Wrong value for border")
         if np.size(h, 0) > n_samples:
             rng.shuffle(h)
             h = h[:n_samples, :]
@@ -135,6 +137,8 @@ def generate(self, dimensions, n_samples, random_state=None):
                 if order < 1:
                     order = 1
                 h2 = _create_uniform_grid_exclude_border(n_dim, order)
+            else:
+                raise ValueError("Wrong value for append_border")
             h = np.vstack((h, h2[:(n_samples - np.size(h, 0))]))
             rng.shuffle(h)
         else:

From b08b6e32db7cd6c8b9482ad438a81fde51f691f5 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 16:13:32 +0100
Subject: [PATCH 090/265] Fix None case in cook_initial_point_generator

---
 skopt/optimizer/base.py   |  2 +-
 skopt/optimizer/dummy.py  |  6 +++---
 skopt/optimizer/forest.py | 15 +++++++++------
 skopt/optimizer/gbrt.py   | 15 +++++++++------
 skopt/optimizer/gp.py     | 15 +++++++++------
 skopt/utils.py            | 14 ++++++++------
 6 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index cc32ce9ae..536f5513e 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -37,7 +37,7 @@ def base_minimize(func, dimensions, base_estimator,
         and return the objective value.
     
         If you have a search-space where all dimensions have names,
-        then you can use `skopt.utils.use_named_args` as a decorator
+        then you can use :func:`skopt.utils.use_named_args` as a decorator
         on your objective function, in order to call it directly
         with the named arguments. See `use_named_args` for an example.
 
diff --git a/skopt/optimizer/dummy.py b/skopt/optimizer/dummy.py
index d436800ea..1e432d1de 100644
--- a/skopt/optimizer/dummy.py
+++ b/skopt/optimizer/dummy.py
@@ -16,7 +16,7 @@ def dummy_minimize(func, dimensions, n_calls=100,
         and return the objective value.
     
         If you have a search-space where all dimensions have names,
-        then you can use `skopt.utils.use_named_args` as a decorator
+        then you can use :func:`skopt.utils.use_named_args` as a decorator
         on your objective function, in order to call it directly
         with the named arguments. See `use_named_args` for an example.
 
@@ -35,8 +35,8 @@ def dummy_minimize(func, dimensions, n_calls=100,
     n_calls : int, default=100
         Number of calls to `func` to find the minimum.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index 20ea632cc..36f98e4d1 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -22,13 +22,16 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
 
     The total number of evaluations, `n_calls`, are performed like the
     following. If `x0` is provided but not `y0`, then the elements of `x0`
-    are first evaluated, followed by `n_random_starts` evaluations.
-    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
+    are first evaluated, followed by `n_initial_points` evaluations.
+    Finally, `n_calls - len(x0) - n_initial_points` evaluations are
     made guided by the surrogate model. If `x0` and `y0` are both
-    provided then `n_random_starts` evaluations are first made then
-    `n_calls - n_random_starts` subsequent evaluations are made
+    provided then `n_initial_points` evaluations are first made then
+    `n_calls - n_initial_points` subsequent evaluations are made
     guided by the surrogate model.
 
+    The first `n_initial_points` are generated by the
+    `initial_point_generator`.
+
     Parameters
     ----------
     func : callable
@@ -83,8 +86,8 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         before approximating it with `base_estimator`. Initial point
         generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
diff --git a/skopt/optimizer/gbrt.py b/skopt/optimizer/gbrt.py
index 479c004d1..70694a11a 100644
--- a/skopt/optimizer/gbrt.py
+++ b/skopt/optimizer/gbrt.py
@@ -23,13 +23,16 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
 
     The total number of evaluations, `n_calls`, are performed like the
     following. If `x0` is provided but not `y0`, then the elements of `x0`
-    are first evaluated, followed by `n_random_starts` evaluations.
-    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
+    are first evaluated, followed by `n_initial_points` evaluations.
+    Finally, `n_calls - len(x0) - n_initial_points` evaluations are
     made guided by the surrogate model. If `x0` and `y0` are both
-    provided then `n_random_starts` evaluations are first made then
-    `n_calls - n_random_starts` subsequent evaluations are made
+    provided then `n_initial_points` evaluations are first made then
+    `n_calls - n_initial_points` subsequent evaluations are made
     guided by the surrogate model.
 
+    The first `n_initial_points` are generated by the
+    `initial_point_generator`.
+
     Parameters
     ----------
     func : callable
@@ -70,8 +73,8 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         before approximating it with `base_estimator`. Initial point
         generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index 1fe914760..b89a3857a 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -34,13 +34,16 @@ def gp_minimize(func, dimensions, base_estimator=None,
 
     The total number of evaluations, `n_calls`, are performed like the
     following. If `x0` is provided but not `y0`, then the elements of `x0`
-    are first evaluated, followed by `n_random_starts` evaluations.
-    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
+    are first evaluated, followed by `n_initial_points` evaluations.
+    Finally, `n_calls - len(x0) - n_initial_points` evaluations are
     made guided by the surrogate model. If `x0` and `y0` are both
-    provided then `n_random_starts` evaluations are first made then
-    `n_calls - n_random_starts` subsequent evaluations are made
+    provided then `n_initial_points` evaluations are first made then
+    `n_calls - n_initial_points` subsequent evaluations are made
     guided by the surrogate model.
 
+    The first `n_initial_points` are generated by the
+    `initial_point_generator`.
+
     Parameters
     ----------
     func : callable
@@ -96,8 +99,8 @@ def gp_minimize(func, dimensions, base_estimator=None,
         before approximating it with `base_estimator`. Initial point
         generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
diff --git a/skopt/utils.py b/skopt/utils.py
index 0dfffe897..6c769d731 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -422,7 +422,9 @@ def cook_initial_point_generator(generator, **kwargs):
     kwargs : dict
         Extra parameters provided to the generator at init time.
     """
-    if isinstance(generator, str):
+    if generator is None:
+        generator = "random"
+    elif isinstance(generator, str):
         generator = generator.lower()
         if generator not in ["sobol", "halton", "hammersly", "lhs", "random",
                              "grid"]:
@@ -431,11 +433,9 @@ def cook_initial_point_generator(generator, **kwargs):
                              "'random', or 'grid' not "
                              "%s." % generator)
     elif not isinstance(generator, InitialPointGenerator):
-        raise ValueError("generator has to be an InitialPointGenerator.")
-    if generator == "random":
-        return None
-    elif generator is None:
-        generator = "lhs"
+        raise ValueError("generator has to be an InitialPointGenerator."
+                         "Got %s" % (str(type(generator))))
+
     if isinstance(generator, str):
         if generator == "sobol":
             generator = Sobol()
@@ -447,6 +447,8 @@ def cook_initial_point_generator(generator, **kwargs):
             generator = Lhs()
         elif generator == "grid":
             generator = Grid()
+        elif generator == "random":
+            return None
     generator.set_params(**kwargs)
     return generator
 

From f96cf6a9c9b39294d093250805d59e28e21d9805 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 16:30:06 +0100
Subject: [PATCH 091/265] Set deprecated to version 0.9

Fix coverage calculation script for travis CI
---
 build_tools/travis/test_script.sh | 7 ++++---
 skopt/optimizer/base.py           | 2 +-
 skopt/optimizer/forest.py         | 2 +-
 skopt/optimizer/gbrt.py           | 2 +-
 skopt/optimizer/gp.py             | 2 +-
 skopt/optimizer/optimizer.py      | 2 +-
 skopt/sampler/grid.py             | 3 ++-
 7 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index a14e50706..e1ade4bc8 100644
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -11,10 +11,11 @@ set -e
 python --version
 python -c "import numpy; print('numpy %s' % numpy.__version__)"
 python -c "import scipy; print('scipy %s' % scipy.__version__)"
+python -c "import sklearn; print('scipy %s' % sklearn.__version__)"
 python -c "\
 try:
-    import sklearn
-    print('sklearn %s' % sklearn.__version__)
+    import skopt
+    print('skopt %s' % skopt.__version__)
 except ImportError:
     pass
 "
@@ -36,7 +37,7 @@ run_tests() {
     export SKOPT_SKIP_NETWORK_TESTS=1
 
     if [[ "$COVERAGE" == "true" ]]; then
-        TEST_CMD="$TEST_CMD --cov sklearn"
+        TEST_CMD="$TEST_CMD --cov skopt"
     fi
 
     if [[ -n "$CHECK_WARNINGS" ]]; then
diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index 536f5513e..d0784ac1c 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -69,7 +69,7 @@ def base_minimize(func, dimensions, base_estimator,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
-        .. deprecated::
+        .. deprecated:: 0.9
             use `n_initial_points` instead.
 
     n_initial_points : int, default=10
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index 36f98e4d1..b734e3589 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -78,7 +78,7 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
-        .. deprecated::
+        .. deprecated:: 0.9
             use `n_initial_points` instead.
 
     n_initial_points : int, default=10
diff --git a/skopt/optimizer/gbrt.py b/skopt/optimizer/gbrt.py
index 70694a11a..2196a4d5c 100644
--- a/skopt/optimizer/gbrt.py
+++ b/skopt/optimizer/gbrt.py
@@ -65,7 +65,7 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
-        .. deprecated::
+        .. deprecated:: 0.9
             use `n_initial_points` instead.
 
     n_initial_points : int, default=10
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index b89a3857a..dac3e3e9f 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -91,7 +91,7 @@ def gp_minimize(func, dimensions, base_estimator=None,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
-        .. deprecated::
+        .. deprecated:: 0.9
             use `n_initial_points` instead.
 
     n_initial_points : int, default=10
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index e7b60b308..bdd929f1e 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -62,7 +62,7 @@ class Optimizer(object):
         is used in the minimize functions.
 
     n_random_starts : int, default=10
-        .. deprecated::
+        .. deprecated:: 0.9
             use `n_initial_points` instead.
 
     n_initial_points : int, default=10
diff --git a/skopt/sampler/grid.py b/skopt/sampler/grid.py
index 620c04e87..e20f1fc77 100644
--- a/skopt/sampler/grid.py
+++ b/skopt/sampler/grid.py
@@ -53,7 +53,8 @@ def _create_uniform_grid_only_border(n_dim, order):
 class Grid(InitialPointGenerator):
     """Generate samples from a regular grid."""
 
-    def __init__(self, border="exclude", use_full_layout=True, append_border="only"):
+    def __init__(self, border="exclude", use_full_layout=True,
+                 append_border="only"):
         self.border = border
         self.use_full_layout = use_full_layout
         self.append_border = append_border

From 10b7d6fa0156937abaf66b2cd52c160063498d95 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 17:26:41 +0100
Subject: [PATCH 092/265] Improve unit tests and add doc string for grid

---
 build_tools/travis/test_script.sh |  2 +-
 skopt/sampler/grid.py             | 22 +++++++-
 skopt/sampler/lhs.py              |  3 +-
 skopt/tests/test_sampler.py       | 84 ++++++++++++++++++++++++-------
 4 files changed, 89 insertions(+), 22 deletions(-)

diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index e1ade4bc8..b5c05b033 100644
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -11,7 +11,7 @@ set -e
 python --version
 python -c "import numpy; print('numpy %s' % numpy.__version__)"
 python -c "import scipy; print('scipy %s' % scipy.__version__)"
-python -c "import sklearn; print('scipy %s' % sklearn.__version__)"
+python -c "import sklearn; print('sklearn %s' % sklearn.__version__)"
 python -c "\
 try:
     import skopt
diff --git a/skopt/sampler/grid.py b/skopt/sampler/grid.py
index e20f1fc77..267a856f1 100644
--- a/skopt/sampler/grid.py
+++ b/skopt/sampler/grid.py
@@ -51,7 +51,27 @@ def _create_uniform_grid_only_border(n_dim, order):
 
 
 class Grid(InitialPointGenerator):
-    """Generate samples from a regular grid."""
+    """Generate samples from a regular grid.
+
+    Parameters
+    ----------
+    border : str, default='exclude'
+        defines how the samples are generated:
+        - 'include' : Includes the border into the grid layout
+        - 'exclude' : Excludes the border from the grid layout
+        - 'only' : Selects only points at the border of the dimension
+    use_full_layout : boolean, default=True
+        When True, a  full factorial design is generated and missing points are taken
+        from the next larger  full factorial design, depending on `append_border`
+        When False, the next larger  full factorial design is generated and points
+        are randomly selected from it.
+    append_border : str, default="only"
+        When use_full_layout is True, this parameter defines how the missing
+        points will be generated from the next larger grid layout:
+        - 'include' : Includes the border into the grid layout
+        - 'exclude' : Excludes the border from the grid layout
+        - 'only' : Selects only points at the border of the dimension
+    """
 
     def __init__(self, border="exclude", use_full_layout=True,
                  append_border="only"):
diff --git a/skopt/sampler/lhs.py b/skopt/sampler/lhs.py
index ef22f7d76..d63a15bb3 100644
--- a/skopt/sampler/lhs.py
+++ b/skopt/sampler/lhs.py
@@ -93,7 +93,8 @@ def generate(self, dimensions, n_samples, random_state=None):
                     # Generate a random LHS
                     h = self._lhs_normalized(n_dim, n_samples, rng)
                     r = np.corrcoef(np.array(h).T)
-                    if np.max(np.abs(r[r != 1])) < mincorr:
+                    if len(np.abs(r[r != 1])) > 0 and \
+                            np.max(np.abs(r[r != 1])) < mincorr:
                         mincorr = np.max(np.abs(r - np.eye(r.shape[0])))
                         h_opt = h.copy()
                         h_opt = space.inverse_transform(h_opt)
diff --git a/skopt/tests/test_sampler.py b/skopt/tests/test_sampler.py
index 958188c64..ab1312be4 100644
--- a/skopt/tests/test_sampler.py
+++ b/skopt/tests/test_sampler.py
@@ -25,17 +25,48 @@
 from skopt.sampler.grid import _create_uniform_grid_exclude_border
 from skopt.sampler.grid import _quadrature_combine
 from skopt.sampler.grid import _create_uniform_grid_only_border
+from skopt.utils import cook_initial_point_generator
+
+
+LHS_TYPE = ["classic", "centered"]
+CRITERION = ["maximin", "ratio", "correlation", None]
+SAMPLER = ["lhs", "halton", "sobol", "hammersly", "grid"]
 
 
 @pytest.mark.fast_test
-def test_lhs_type():
-    lhs = Lhs(lhs_type="classic")
-    samples = lhs.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
+def test_lhs_centered():
     lhs = Lhs(lhs_type="centered")
     samples = lhs.generate([(0., 1.), ] * 3, 3)
     assert_almost_equal(np.sum(samples), 4.5)
+
+
+@pytest.mark.parametrize("samlper", SAMPLER)
+def test_sampler(samlper):
+   s = cook_initial_point_generator(samlper)
+   samples = s.generate([(0., 1.), ] * 2, 200)
+   assert len(samples) == 200
+   assert len(samples[0]) == 2
+   assert isinstance(s, InitialPointGenerator)
+
+   samples = s.generate([("a", "b", "c")], 3)
+   assert samples[0][0] in ["a", "b", "c"]
+
+   samples = s.generate([("a", "b", "c"), (0, 1)], 1)
+   assert samples[0][0] in ["a", "b", "c"]
+   assert samples[0][1] in [0, 1]
+
+   samples = s.generate([("a", "b", "c"), (0, 1)], 3)
+   assert samples[0][0] in ["a", "b", "c"]
+   assert samples[0][1] in [0, 1]
+
+
+@pytest.mark.parametrize("lhs_type", LHS_TYPE)
+@pytest.mark.parametrize("criterion", CRITERION)
+def test_lhs_criterion(lhs_type, criterion):
+    lhs = Lhs(lhs_type=lhs_type, criterion=criterion, iterations=100)
+    samples = lhs.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
     samples = lhs.generate([("a", "b", "c")], 3)
     assert samples[0][0] in ["a", "b", "c"]
 
@@ -48,14 +79,6 @@ def test_lhs_type():
     assert samples[0][1] in [0, 1]
 
 
-def test_lhs_criterion():
-    for criterion in ["maximin", "ratio", "correlation"]:
-        lhs = Lhs(criterion=criterion, iterations=100)
-        samples = lhs.generate([(0., 1.), ] * 2, 200)
-        assert len(samples) == 200
-        assert len(samples[0]) == 2
-
-
 def test_lhs_pdist():
     n_dim = 2
     n_samples = 20
@@ -69,7 +92,8 @@ def test_lhs_pdist():
     assert np.min(d) > np.min(d_classic)
 
 
-def test_lhs_random_state():
+@pytest.mark.parametrize("criterion", CRITERION)
+def test_lhs_random_state(criterion):
     n_dim = 2
     n_samples = 20
     lhs = Lhs()
@@ -77,11 +101,10 @@ def test_lhs_random_state():
     h = lhs._lhs_normalized(n_dim, n_samples, 0)
     h2 = lhs._lhs_normalized(n_dim, n_samples, 0)
     assert_array_equal(h, h2)
-    for criterion in ["maximin", "ratio", "correlation"]:
-        lhs = Lhs(criterion=criterion, iterations=100)
-        h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
-        h2 = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
-        assert_array_equal(h, h2)
+    lhs = Lhs(criterion=criterion, iterations=100)
+    h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
+    h2 = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
+    assert_array_equal(h, h2)
 
 
 @pytest.mark.fast_test
@@ -234,3 +257,26 @@ def test_uniform_grid():
     assert_raises(AssertionError, _create_uniform_grid_include_border, 0, 1)
     assert_raises(AssertionError, _create_uniform_grid_only_border, 1, 1)
     assert_raises(AssertionError, _create_uniform_grid_only_border, 0, 2)
+
+
+@pytest.mark.fast_test
+def test_grid():
+    grid = Grid()
+    samples = grid.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+    grid = Grid(border="include")
+    samples = grid.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+    grid = Grid(use_full_layout=False)
+    samples = grid.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+    grid = Grid(use_full_layout=True, append_border="include")
+    samples = grid.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2

From 0532b4f1d9e1b1ff309c36f21829bac8010eb657 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 17:29:20 +0100
Subject: [PATCH 093/265] Fix pep8

---
 skopt/sampler/grid.py       |  9 +++++----
 skopt/tests/test_sampler.py | 32 ++++++++++++++++----------------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/skopt/sampler/grid.py b/skopt/sampler/grid.py
index 267a856f1..2e90d7a2e 100644
--- a/skopt/sampler/grid.py
+++ b/skopt/sampler/grid.py
@@ -61,10 +61,11 @@ class Grid(InitialPointGenerator):
         - 'exclude' : Excludes the border from the grid layout
         - 'only' : Selects only points at the border of the dimension
     use_full_layout : boolean, default=True
-        When True, a  full factorial design is generated and missing points are taken
-        from the next larger  full factorial design, depending on `append_border`
-        When False, the next larger  full factorial design is generated and points
-        are randomly selected from it.
+        When True, a  full factorial design is generated and
+        missing points are taken from the next larger full factorial
+        design, depending on `append_border`
+        When False, the next larger  full factorial design is
+        generated and points are randomly selected from it.
     append_border : str, default="only"
         When use_full_layout is True, this parameter defines how the missing
         points will be generated from the next larger grid layout:
diff --git a/skopt/tests/test_sampler.py b/skopt/tests/test_sampler.py
index ab1312be4..e8cafb143 100644
--- a/skopt/tests/test_sampler.py
+++ b/skopt/tests/test_sampler.py
@@ -42,22 +42,22 @@ def test_lhs_centered():
 
 @pytest.mark.parametrize("samlper", SAMPLER)
 def test_sampler(samlper):
-   s = cook_initial_point_generator(samlper)
-   samples = s.generate([(0., 1.), ] * 2, 200)
-   assert len(samples) == 200
-   assert len(samples[0]) == 2
-   assert isinstance(s, InitialPointGenerator)
-
-   samples = s.generate([("a", "b", "c")], 3)
-   assert samples[0][0] in ["a", "b", "c"]
-
-   samples = s.generate([("a", "b", "c"), (0, 1)], 1)
-   assert samples[0][0] in ["a", "b", "c"]
-   assert samples[0][1] in [0, 1]
-
-   samples = s.generate([("a", "b", "c"), (0, 1)], 3)
-   assert samples[0][0] in ["a", "b", "c"]
-   assert samples[0][1] in [0, 1]
+    s = cook_initial_point_generator(samlper)
+    samples = s.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+    assert isinstance(s, InitialPointGenerator)
+
+    samples = s.generate([("a", "b", "c")], 3)
+    assert samples[0][0] in ["a", "b", "c"]
+
+    samples = s.generate([("a", "b", "c"), (0, 1)], 1)
+    assert samples[0][0] in ["a", "b", "c"]
+    assert samples[0][1] in [0, 1]
+
+    samples = s.generate([("a", "b", "c"), (0, 1)], 3)
+    assert samples[0][0] in ["a", "b", "c"]
+    assert samples[0][1] in [0, 1]
 
 
 @pytest.mark.parametrize("lhs_type", LHS_TYPE)

From 93adf63e1b132c799bfe357366fbb3099b8d4d06 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 17:50:16 +0100
Subject: [PATCH 094/265] Try improved doc creation

---
 .circleci/artifact_path                     |   1 +
 .circleci/config.yml                        |  44 +++-
 build_tools/circle/build_doc.sh             | 252 ++++++++++++++++++++
 build_tools/circle/checkout_merge_commit.sh |  32 +++
 build_tools/circle/linting.sh               | 156 ++++++++++++
 build_tools/circle/list_versions.py         |  97 ++++++++
 build_tools/circle/push_doc.sh              |  65 +++++
 7 files changed, 646 insertions(+), 1 deletion(-)
 create mode 100644 .circleci/artifact_path
 create mode 100644 build_tools/circle/build_doc.sh
 create mode 100644 build_tools/circle/checkout_merge_commit.sh
 create mode 100644 build_tools/circle/linting.sh
 create mode 100644 build_tools/circle/list_versions.py
 create mode 100644 build_tools/circle/push_doc.sh

diff --git a/.circleci/artifact_path b/.circleci/artifact_path
new file mode 100644
index 000000000..a03587f29
--- /dev/null
+++ b/.circleci/artifact_path
@@ -0,0 +1 @@
+0/doc/_changed.html
\ No newline at end of file
diff --git a/.circleci/config.yml b/.circleci/config.yml
index c3890036b..800d28b40 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -30,6 +30,35 @@ jobs:
           root: ~/doc
           paths: .
 
+  lint:
+    docker:
+      - image: circleci/python:3.6
+    steps:
+      - checkout
+      - run: ./build_tools/circle/checkout_merge_commit.sh
+      - run:
+          name: dependencies
+          command: sudo pip install flake8
+      - run:
+          name: linting
+          command: ./build_tools/circle/linting.sh
+
+  pypy3:
+    docker:
+      - image: pypy:3.6-7.2.0
+    steps:
+      - restore_cache:
+          keys:
+            - pypy3-ccache-{{ .Branch }}
+            - pypy3-ccache
+      - checkout
+      - run: ./build_tools/circle/build_test_pypy.sh
+      - save_cache:
+          key: pypy3-ccache-{{ .Branch }}-{{ .BuildNum }}
+          paths:
+            - ~/.ccache
+            - ~/.cache/pip
+
   deploy:
     docker:
       - image: circleci/python:3.6.7
@@ -44,10 +73,23 @@ workflows:
   version: 2
   build-doc-and-deploy:
     jobs:
-      - build
+      - lint
+      - build:
+          requires:
+            - lint
       - deploy:
           requires:
             - build
           filters:
             branches:
               only: master
+  pypy:
+    triggers:
+      - schedule:
+          cron: "0 0 * * *"
+          filters:
+            branches:
+              only:
+                - master
+    jobs:
+      - pypy3
\ No newline at end of file
diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
new file mode 100644
index 000000000..5d9dfd177
--- /dev/null
+++ b/build_tools/circle/build_doc.sh
@@ -0,0 +1,252 @@
+#!/usr/bin/env bash
+set -x
+set -e
+
+# Decide what kind of documentation build to run, and run it.
+#
+# If the last commit message has a "[doc skip]" marker, do not build
+# the doc. On the contrary if a "[doc build]" marker is found, build the doc
+# instead of relying on the subsequent rules.
+#
+# We always build the documentation for jobs that are not related to a specific
+# PR (e.g. a merge to master or a maintenance branch).
+#
+# If this is a PR, do a full build if there are some files in this PR that are
+# under the "doc/" or "examples/" folders, otherwise perform a quick build.
+#
+# If the inspection of the current commit fails for any reason, the default
+# behavior is to quick build the documentation.
+
+get_build_type() {
+    if [ -z "$CIRCLE_SHA1" ]
+    then
+        echo SKIP: undefined CIRCLE_SHA1
+        return
+    fi
+    commit_msg=$(git log --format=%B -n 1 $CIRCLE_SHA1)
+    if [ -z "$commit_msg" ]
+    then
+        echo QUICK BUILD: failed to inspect commit $CIRCLE_SHA1
+        return
+    fi
+    if [[ "$commit_msg" =~ \[doc\ skip\] ]]
+    then
+        echo SKIP: [doc skip] marker found
+        return
+    fi
+    if [[ "$commit_msg" =~ \[doc\ quick\] ]]
+    then
+        echo QUICK: [doc quick] marker found
+        return
+    fi
+    if [[ "$commit_msg" =~ \[doc\ build\] ]]
+    then
+        echo BUILD: [doc build] marker found
+        return
+    fi
+    if [ -z "$CI_PULL_REQUEST" ]
+    then
+        echo BUILD: not a pull request
+        return
+    fi
+    git_range="origin/master...$CIRCLE_SHA1"
+    git fetch origin master >&2 || (echo QUICK BUILD: failed to get changed filenames for $git_range; return)
+    filenames=$(git diff --name-only $git_range)
+    if [ -z "$filenames" ]
+    then
+        echo QUICK BUILD: no changed filenames for $git_range
+        return
+    fi
+    changed_examples=$(echo "$filenames" | grep -E "^examples/(.*/)*plot_")
+
+    # The following is used to extract the list of filenames of example python
+    # files that sphinx-gallery needs to run to generate png files used as
+    # figures or images in the .rst files  from the documentation.
+    # If the contributor changes a .rst file in a PR we need to run all
+    # the examples mentioned in that file to get sphinx build the
+    # documentation without generating spurious warnings related to missing
+    # png files.
+
+    if [[ -n "$filenames" ]]
+    then
+        # get rst files
+        rst_files="$(echo "$filenames" | grep -E "rst$")"
+
+        # get lines with figure or images
+        img_fig_lines="$(echo "$rst_files" | xargs grep -shE "(figure|image)::")"
+
+        # get only auto_examples
+        auto_example_files="$(echo "$img_fig_lines" | grep auto_examples | awk -F "/" '{print $NF}')"
+
+        # remove "sphx_glr_" from path and accept replace _(\d\d\d|thumb).png with .py
+        scripts_names="$(echo "$auto_example_files" | sed 's/sphx_glr_//' | sed -E 's/_([[:digit:]][[:digit:]][[:digit:]]|thumb).png/.py/')"
+
+        # get unique values
+        examples_in_rst="$(echo "$scripts_names" | uniq )"
+    fi
+
+    # executed only if there are examples in the modified rst files
+    if [[ -n "$examples_in_rst" ]]
+    then
+        if [[ -n "$changed_examples" ]]
+        then
+            changed_examples="$changed_examples|$examples_in_rst"
+        else
+            changed_examples="$examples_in_rst"
+        fi
+    fi
+
+    if [[ -n "$changed_examples" ]]
+    then
+        echo BUILD: detected examples/ filename modified in $git_range: $changed_examples
+        pattern=$(echo "$changed_examples" | paste -sd '|')
+        # pattern for examples to run is the last line of output
+        echo "$pattern"
+        return
+    fi
+    echo QUICK BUILD: no examples/ filename modified in $git_range:
+    echo "$filenames"
+}
+
+build_type=$(get_build_type)
+if [[ "$build_type" =~ ^SKIP ]]
+then
+    exit 0
+fi
+
+if [[ "$CIRCLE_BRANCH" =~ ^master$|^[0-9]+\.[0-9]+\.X$ && -z "$CI_PULL_REQUEST" ]]
+then
+    # PDF linked into HTML
+    make_args="dist LATEXMKOPTS=-halt-on-error"
+elif [[ "$build_type" =~ ^QUICK ]]
+then
+    make_args=html-noplot
+elif [[ "$build_type" =~ ^'BUILD: detected examples' ]]
+then
+    # pattern for examples to run is the last line of output
+    pattern=$(echo "$build_type" | tail -n 1)
+    make_args="html EXAMPLES_PATTERN=$pattern"
+else
+    make_args=html
+fi
+
+make_args="SPHINXOPTS=-T $make_args"  # show full traceback on exception
+
+# Installing required system packages to support the rendering of math
+# notation in the HTML documentation
+sudo -E apt-get -yq update
+sudo -E apt-get -yq remove texlive-binaries --purge
+sudo -E apt-get -yq --no-install-suggests --no-install-recommends \
+    install dvipng texlive-latex-base texlive-latex-extra \
+    texlive-latex-recommended texlive-fonts-recommended \
+    latexmk gsfonts ccache
+
+# deactivate circleci virtualenv and setup a miniconda env instead
+if [[ `type -t deactivate` ]]; then
+  deactivate
+fi
+
+# Install dependencies with miniconda
+wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
+   -O miniconda.sh
+chmod +x miniconda.sh && ./miniconda.sh -b -p $MINICONDA_PATH
+export PATH="/usr/lib/ccache:$MINICONDA_PATH/bin:$PATH"
+
+ccache -M 512M
+export CCACHE_COMPRESS=1
+
+# Old packages coming from the 'free' conda channel have been removed but we
+# are using them for our min-dependencies doc generation. See
+# https://www.anaconda.com/why-we-removed-the-free-channel-in-conda-4-7/ for
+# more details.
+if [[ "$CIRCLE_JOB" == "doc-min-dependencies" ]]; then
+    conda config --set restore_free_channel true
+fi
+
+# packaging won't be needed once setuptools starts shipping packaging>=17.0
+conda create -n $CONDA_ENV_NAME --yes --quiet python="${PYTHON_VERSION:-*}" \
+  numpy="${NUMPY_VERSION:-*}" scipy="${SCIPY_VERSION:-*}" \
+  cython="${CYTHON_VERSION:-*}" pytest coverage \
+  matplotlib="${MATPLOTLIB_VERSION:-*}" sphinx=2.1.2 pillow \
+  scikit-image="${SCIKIT_IMAGE_VERSION:-*}" pandas="${PANDAS_VERSION:-*}" \
+  joblib memory_profiler packaging
+
+source activate testenv
+pip install sphinx-gallery
+pip install numpydoc
+
+# Build and install scikit-optimize in dev mode
+python setup.py build_ext --inplace -j 3
+python setup.py develop
+
+export OMP_NUM_THREADS=1
+
+if [[ "$CIRCLE_BRANCH" =~ ^master$ && -z "$CI_PULL_REQUEST" ]]
+then
+    # List available documentation versions if on master
+    python build_tools/circle/list_versions.py > doc/versions.rst
+fi
+
+# The pipefail is requested to propagate exit code
+set -o pipefail && cd doc && make $make_args 2>&1 | tee ~/log.txt
+
+# Insert the version warning for deployment
+find _build/html/stable -name "*.html" | xargs sed -i '/<\/body>/ i \
+\    <script src="https://scikit-optimize.github.io/versionwarning.js"></script>'
+
+cd -
+set +o pipefail
+
+affected_doc_paths() {
+    files=$(git diff --name-only origin/master...$CIRCLE_SHA1)
+    echo "$files" | grep ^doc/.*\.rst | sed 's/^doc\/\(.*\)\.rst$/\1.html/'
+    echo "$files" | grep ^examples/.*.py | sed 's/^\(.*\)\.py$/auto_\1.html/'
+    skopt_files=$(echo "$files" | grep '^skopt/')
+    if [ -n "$skopt_files" ]
+    then
+        grep -hlR -f<(echo "$skopt_files" | sed 's/^/scikit-optimize\/blob\/[a-z0-9]*\//') doc/_build/html/stable/modules/generated | cut -d/ -f5-
+    fi
+}
+
+affected_doc_warnings() {
+    files=$(git diff --name-only origin/master...$CIRCLE_SHA1)
+    # Look for sphinx warnings only in files affected by the PR
+    if [ -n "$files" ]
+    then
+        for af in ${files[@]}
+        do
+          warn+=`grep WARNING ~/log.txt | grep $af`
+        done
+    fi
+    echo "$warn"
+}
+
+if [ -n "$CI_PULL_REQUEST" ]
+then
+    echo "The following documentation warnings may have been generated by PR #$CI_PULL_REQUEST:"
+    warnings=$(affected_doc_warnings)
+    if [ -z "$warnings" ]
+    then
+        warnings="/home/circleci/project/ no warnings"
+    fi
+    echo "$warnings"
+
+    echo "The following documentation files may have been changed by PR #$CI_PULL_REQUEST:"
+    affected=$(affected_doc_paths)
+    echo "$affected"
+    (
+    echo '<html><body><ul>'
+    echo "$affected" | sed 's|.*|<li><a href="&">&</a> [<a href="https://scikit-optimize.github.io/dev/&">dev</a>, <a href="https://scikit-optimize.github.io/stable/&">stable</a>]</li>|'
+    echo '</ul><p>General: <a href="index.html">Home</a> | <a href="modules/classes.html">API Reference</a> | <a href="auto_examples/index.html">Examples</a></p>'
+    echo '<strong>Sphinx Warnings in affected files</strong><ul>'
+    echo "$warnings" | sed 's/\/home\/circleci\/project\//<li>/g'
+    echo '</ul></body></html>'
+    ) > 'doc/_build/html/stable/_changed.html'
+
+    if [ "$warnings" != "/home/circleci/project/ no warnings" ]
+    then
+        echo "Sphinx generated warnings when building the documentation related to files modified in this PR."
+        echo "Please check doc/_build/html/stable/_changed.html"
+        exit 1
+    fi
+fi
diff --git a/build_tools/circle/checkout_merge_commit.sh b/build_tools/circle/checkout_merge_commit.sh
new file mode 100644
index 000000000..edbc679bd
--- /dev/null
+++ b/build_tools/circle/checkout_merge_commit.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+
+# Add `master` branch to the update list.
+# Otherwise CircleCI will give us a cached one.
+FETCH_REFS="+master:master"
+
+# Update PR refs for testing.
+if [[ -n "${CIRCLE_PR_NUMBER}" ]]
+then
+    FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/head:pr/${CIRCLE_PR_NUMBER}/head"
+    FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/merge:pr/${CIRCLE_PR_NUMBER}/merge"
+fi
+
+# Retrieve the refs.
+git fetch -u origin ${FETCH_REFS}
+
+# Checkout the PR merge ref.
+if [[ -n "${CIRCLE_PR_NUMBER}" ]]
+then
+    git checkout -qf "pr/${CIRCLE_PR_NUMBER}/merge" || (
+        echo Could not fetch merge commit. >&2
+        echo There may be conflicts in merging PR \#${CIRCLE_PR_NUMBER} with master. >&2;
+        exit 1)
+fi
+
+# Check for merge conflicts.
+if [[ -n "${CIRCLE_PR_NUMBER}" ]]
+then
+    git branch --merged | grep master > /dev/null
+    git branch --merged | grep "pr/${CIRCLE_PR_NUMBER}/head" > /dev/null
+fi
\ No newline at end of file
diff --git a/build_tools/circle/linting.sh b/build_tools/circle/linting.sh
new file mode 100644
index 000000000..055d1b696
--- /dev/null
+++ b/build_tools/circle/linting.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+
+# This script is used in CircleCI to check that PRs do not add obvious
+# flake8 violations. It relies on two things:
+#   - find common ancestor between branch and
+#     scikit-optimize/scikit-optimize remote
+#   - run flake8 --diff on the diff between the branch and the common
+#     ancestor
+#
+# Additional features:
+#   - the line numbers in Travis match the local branch on the PR
+#     author machine.
+#   - ./build_tools/circle/flake8_diff.sh can be run locally for quick
+#     turn-around
+
+set -e
+# pipefail is necessary to propagate exit codes
+set -o pipefail
+
+PROJECT=scikit-optimize/scikit-optimize
+PROJECT_URL=https://github.com/$PROJECT.git
+
+# Find the remote with the project name (upstream in most cases)
+REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '')
+
+# Add a temporary remote if needed. For example this is necessary when
+# Travis is configured to run in a fork. In this case 'origin' is the
+# fork and not the reference repo we want to diff against.
+if [[ -z "$REMOTE" ]]; then
+    TMP_REMOTE=tmp_reference_upstream
+    REMOTE=$TMP_REMOTE
+    git remote add $REMOTE $PROJECT_URL
+fi
+
+echo "Remotes:"
+echo '--------------------------------------------------------------------------------'
+git remote --verbose
+
+# Travis does the git clone with a limited depth (50 at the time of
+# writing). This may not be enough to find the common ancestor with
+# $REMOTE/master so we unshallow the git checkout
+if [[ -a .git/shallow ]]; then
+    echo -e '\nTrying to unshallow the repo:'
+    echo '--------------------------------------------------------------------------------'
+    git fetch --unshallow
+fi
+
+if [[ "$TRAVIS" == "true" ]]; then
+    if [[ "$TRAVIS_PULL_REQUEST" == "false" ]]
+    then
+        # In main repo, using TRAVIS_COMMIT_RANGE to test the commits
+        # that were pushed into a branch
+        if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then
+            if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then
+                echo "New branch, no commit range from Travis so passing this test by convention"
+                exit 0
+            fi
+            COMMIT_RANGE=$TRAVIS_COMMIT_RANGE
+        fi
+    else
+        # We want to fetch the code as it is in the PR branch and not
+        # the result of the merge into master. This way line numbers
+        # reported by Travis will match with the local code.
+        LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST
+        # In Travis the PR target is always origin
+        git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF
+    fi
+fi
+
+# If not using the commit range from Travis we need to find the common
+# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master
+if [[ -z "$COMMIT_RANGE" ]]; then
+    if [[ -z "$LOCAL_BRANCH_REF" ]]; then
+        LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD)
+    fi
+    echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:"
+    echo '--------------------------------------------------------------------------------'
+    git --no-pager log -2 $LOCAL_BRANCH_REF
+
+    REMOTE_MASTER_REF="$REMOTE/master"
+    # Make sure that $REMOTE_MASTER_REF is a valid reference
+    echo -e "\nFetching $REMOTE_MASTER_REF"
+    echo '--------------------------------------------------------------------------------'
+    git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF
+    LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF)
+    REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF)
+
+    COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \
+        echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)"
+
+    if [ -z "$COMMIT" ]; then
+        exit 1
+    fi
+
+    COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT)
+
+    echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\
+         "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:"
+    echo '--------------------------------------------------------------------------------'
+    git --no-pager show --no-patch $COMMIT_SHORT_HASH
+
+    COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH"
+
+    if [[ -n "$TMP_REMOTE" ]]; then
+        git remote remove $TMP_REMOTE
+    fi
+
+else
+    echo "Got the commit range from Travis: $COMMIT_RANGE"
+fi
+
+echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \
+     "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):"
+echo '--------------------------------------------------------------------------------'
+
+# We need the following command to exit with 0 hence the echo in case
+# there is no match
+MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE | \
+                     grep -v 'doc/sphinxext' || echo "no_match")"
+
+check_files() {
+    files="$1"
+    shift
+    options="$*"
+    if [ -n "$files" ]; then
+        # Conservative approach: diff without context (--unified=0) so that code
+        # that was not changed does not create failures
+        git diff --unified=0 $COMMIT_RANGE -- $files | flake8 --diff --show-source $options
+    fi
+}
+
+if [[ "$MODIFIED_FILES" == "no_match" ]]; then
+    echo "No file outside doc/sphinxext has been modified"
+else
+
+    check_files "$(echo "$MODIFIED_FILES" | grep -v ^examples)"
+    check_files "$(echo "$MODIFIED_FILES" | grep ^examples)" \
+        --config ./examples/.flake8
+fi
+echo -e "No problem detected by flake8\n"
+
+# For docstrings and warnings of deprecated attributes to be rendered
+# properly, the property decorator must come before the deprecated decorator
+# (else they are treated as functions)
+
+# do not error when grep -B1 "@property" finds nothing
+set +e
+bad_deprecation_property_order=`git grep -A 10 "@property"  -- "*.py" | awk '/@property/,/def /' | grep -B1 "@deprecated"`
+
+if [ ! -z "$bad_deprecation_property_order" ]
+then
+    echo "property decorator should come before deprecated decorator"
+    echo "found the following occurrencies:"
+    echo $bad_deprecation_property_order
+    exit 1
+fi
\ No newline at end of file
diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
new file mode 100644
index 000000000..dcafbfb40
--- /dev/null
+++ b/build_tools/circle/list_versions.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+
+# List all available versions of the documentation
+import json
+import re
+import sys
+
+from distutils.version import LooseVersion
+from urllib.request import urlopen
+
+def json_urlread(url):
+    try:
+        return json.loads(urlopen(url).read().decode('utf8'))
+    except Exception:
+        print('Error reading', url, file=sys.stderr)
+        raise
+
+
+def human_readable_data_quantity(quantity, multiple=1024):
+    # https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
+    if quantity == 0:
+        quantity = +0
+    SUFFIXES = ["B"] + [i + {1000: "B", 1024: "iB"}[multiple]
+                        for i in "KMGTPEZY"]
+    for suffix in SUFFIXES:
+        if quantity < multiple or suffix == SUFFIXES[-1]:
+            if suffix == SUFFIXES[0]:
+                return "%d %s" % (quantity, suffix)
+            else:
+                return "%.1f %s" % (quantity, suffix)
+        else:
+            quantity /= multiple
+
+
+def get_pdf_size(version):
+    api_url = ROOT_URL + '%s/_downloads' % version
+    for path_details in json_urlread(api_url):
+        if path_details['name'] == 'scikit-optimize-docs.pdf':
+            return human_readable_data_quantity(path_details['size'], 1000)
+
+
+print(':orphan:')
+print()
+heading = 'Available documentation for Scikit-optimize'
+print(heading)
+print('=' * len(heading))
+print()
+print('Web-based documentation is available for versions listed below:')
+print()
+
+ROOT_URL = 'https://api.github.com/repos/scikit-optimize/scikit-optimize.github.io/contents/'  # noqa
+RAW_FMT = 'https://raw.githubusercontent.com/scikit-optimize/scikit-optimize.github.io/master/%s/index.html'  # noqa
+VERSION_RE = re.compile(r"scikit-optimize ([\w\.\-]+) documentation</title>")
+NAMED_DIRS = ['dev', 'stable']
+
+# Gather data for each version directory, including symlinks
+dirs = {}
+symlinks = {}
+root_listing = json_urlread(ROOT_URL)
+for path_details in root_listing:
+    name = path_details['name']
+    if not (name[:1].isdigit() or name in NAMED_DIRS):
+        continue
+    if path_details['type'] == 'dir':
+        html = urlopen(RAW_FMT % name).read().decode('utf8')
+        version_num = VERSION_RE.search(html).group(1)
+        pdf_size = get_pdf_size(name)
+        dirs[name] = (version_num, pdf_size)
+
+    if path_details['type'] == 'symlink':
+        symlinks[name] = json_urlread(path_details['_links']['self'])['target']
+
+
+# Symlinks should have same data as target
+for src, dst in symlinks.items():
+    if dst in dirs:
+        dirs[src] = dirs[dst]
+
+# Output in order: dev, stable, decreasing other version
+seen = set()
+for name in (NAMED_DIRS +
+             sorted((k for k in dirs if k[:1].isdigit()),
+                    key=LooseVersion, reverse=True)):
+    version_num, pdf_size = dirs[name]
+    if version_num in seen:
+        # symlink came first
+        continue
+    else:
+        seen.add(version_num)
+    name_display = '' if name[:1].isdigit() else ' (%s)' % name
+    path = 'https://scikit-optimize.github.io/%s/' % name
+    out = ('* `Scikit-optimize %s%s documentation <%s>`_'
+           % (version_num, name_display, path))
+    if pdf_size is not None:
+        out += (' (`PDF %s <%s/_downloads/scikit-optimize-docs.pdf>`_)'
+                % (pdf_size, path))
+    print(out)
\ No newline at end of file
diff --git a/build_tools/circle/push_doc.sh b/build_tools/circle/push_doc.sh
new file mode 100644
index 000000000..9c6d6cc95
--- /dev/null
+++ b/build_tools/circle/push_doc.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# This script is meant to be called in the "deploy" step defined in
+# circle.yml. See https://circleci.com/docs/ for more details.
+# The behavior of the script is controlled by environment variable defined
+# in the circle.yml in the top level folder of the project.
+
+set -ex
+
+if [ -z $CIRCLE_PROJECT_USERNAME ];
+then USERNAME="skoptci";
+else USERNAME=$CIRCLE_PROJECT_USERNAME;
+fi
+
+DOC_REPO="scikit-optimize.github.io"
+GENERATED_DOC_DIR=$1
+
+if [[ -z "$GENERATED_DOC_DIR" ]]; then
+    echo "Need to pass directory of the generated doc as argument"
+    echo "Usage: $0 <generated_doc_dir>"
+    exit 1
+fi
+
+# Absolute path needed because we use cd further down in this script
+GENERATED_DOC_DIR=$(readlink -f $GENERATED_DOC_DIR)
+
+if [ "$CIRCLE_BRANCH" = "master" ]
+then
+    dir=dev
+else
+    # Strip off .X
+    dir="${CIRCLE_BRANCH::-2}"
+fi
+
+MSG="Pushing the docs to $dir/ for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1"
+
+cd $HOME
+if [ ! -d $DOC_REPO ];
+then git clone --depth 1 --no-checkout "git@github.com:scikit-optimize/"$DOC_REPO".git";
+fi
+cd $DOC_REPO
+
+# check if it's a new branch
+
+echo $dir > .git/info/sparse-checkout
+if ! git show HEAD:$dir >/dev/null
+then
+	# directory does not exist. Need to make it so sparse checkout works
+	mkdir $dir
+	touch $dir/index.html
+	git add $dir
+fi
+git checkout master
+git reset --hard origin/master
+if [ -d $dir ]
+then
+	git rm -rf $dir/ && rm -rf $dir/
+fi
+cp -R $GENERATED_DOC_DIR $dir
+git config user.email "skoptci@gmail.com"
+git config user.name $USERNAME
+git config push.default matching
+git add -f $dir/
+git commit -m "$MSG" $dir
+git push
+echo $MSG
\ No newline at end of file

From 9102864680d6329cb7948af8f87960a1db4ca831 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 17:56:36 +0100
Subject: [PATCH 095/265] Fix permission

---
 build_tools/circle/build_doc.sh             | 0
 build_tools/circle/checkout_merge_commit.sh | 0
 build_tools/circle/linting.sh               | 0
 build_tools/circle/list_versions.py         | 0
 build_tools/circle/push_doc.sh              | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 build_tools/circle/build_doc.sh
 mode change 100644 => 100755 build_tools/circle/checkout_merge_commit.sh
 mode change 100644 => 100755 build_tools/circle/linting.sh
 mode change 100644 => 100755 build_tools/circle/list_versions.py
 mode change 100644 => 100755 build_tools/circle/push_doc.sh

diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
old mode 100644
new mode 100755
diff --git a/build_tools/circle/checkout_merge_commit.sh b/build_tools/circle/checkout_merge_commit.sh
old mode 100644
new mode 100755
diff --git a/build_tools/circle/linting.sh b/build_tools/circle/linting.sh
old mode 100644
new mode 100755
diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
old mode 100644
new mode 100755
diff --git a/build_tools/circle/push_doc.sh b/build_tools/circle/push_doc.sh
old mode 100644
new mode 100755

From 7d68b833a05be1b6781b43113ec07b1f5c1c2f94 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 17:58:31 +0100
Subject: [PATCH 096/265] Rename script

---
 build_tools/circle/{list_versions.py => list_versions.sh} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename build_tools/circle/{list_versions.py => list_versions.sh} (100%)

diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.sh
similarity index 100%
rename from build_tools/circle/list_versions.py
rename to build_tools/circle/list_versions.sh

From 5f12a297b7cc3b4c80b7f2db8e96a1950b27de0c Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 18:09:11 +0100
Subject: [PATCH 097/265] Change doc build

---
 .circleci/config.yml | 91 +++++++++++++++++++++++++++++++++++++++++---
 doc/Makefile         | 10 ++---
 2 files changed, 90 insertions(+), 11 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 800d28b40..498d06842 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -30,6 +30,80 @@ jobs:
           root: ~/doc
           paths: .
 
+  doc-min-dependencies:
+    docker:
+      - image: circleci/python:3.7.3-stretch
+    environment:
+      - OMP_NUM_THREADS: 2
+      - MKL_NUM_THREADS: 2
+      - MINICONDA_PATH: ~/miniconda
+      - CONDA_ENV_NAME: testenv
+      - PYTHON_VERSION: 3.6
+      - NUMPY_VERSION: 1.13.3
+      - SKLEARN_VERSION: 0.19.1
+      - SCIPY_VERSION: 0.19.1
+      - MATPLOTLIB_VERSION: 2.1.1
+      # on conda, this is the latest for python 3.5
+      # The following places need to be in sync with regard to Cython version:
+      # - .circleci config file
+      # - sklearn/_build_utils/__init__.py
+      # - advanced installation guide
+      - CYTHON_VERSION: 0.28.5
+      - SCIKIT_IMAGE_VERSION: 0.13
+    steps:
+      - checkout
+      - run: ./build_tools/circle/checkout_merge_commit.sh
+      - restore_cache:
+          keys:
+            - doc-min-deps-ccache-{{ .Branch }}
+            - doc-min-deps-ccache
+      - run: ./build_tools/circle/build_doc.sh
+      - save_cache:
+          key: doc-min-deps-ccache-{{ .Branch }}-{{ .BuildNum }}
+          paths:
+            - ~/.ccache
+            - ~/.cache/pip
+      - store_artifacts:
+          path: doc/_build/html/stable
+          destination: doc
+      - store_artifacts:
+          path: ~/log.txt
+          destination: log.txt
+
+  doc:
+    docker:
+      - image: circleci/python:3.7.3-stretch
+    environment:
+      - OMP_NUM_THREADS: 2
+      - MKL_NUM_THREADS: 2
+      - MINICONDA_PATH: ~/miniconda
+      - CONDA_ENV_NAME: testenv
+      - PYTHON_VERSION: 3
+    steps:
+      - checkout
+      - run: ./build_tools/circle/checkout_merge_commit.sh
+      - restore_cache:
+          keys:
+            - doc-ccache-{{ .Branch }}
+            - doc-ccache
+      - run: ./build_tools/circle/build_doc.sh
+      - save_cache:
+          key: doc-ccache-{{ .Branch }}-{{ .BuildNum }}
+          paths:
+            - ~/.ccache
+            - ~/.cache/pip
+      - store_artifacts:
+          path: doc/_build/html/stable
+          destination: doc
+      - store_artifacts:
+          path: ~/log.txt
+          destination: log.txt
+      # Persists generated documentation so that it can be attached and deployed
+      # in the 'deploy' step.
+      - persist_to_workspace:
+          root: doc/_build/html
+          paths: .
+
   lint:
     docker:
       - image: circleci/python:3.6
@@ -66,23 +140,28 @@ jobs:
       - checkout
       - attach_workspace:
           at: ~/doc
-      - deploy:
-          command: bash build_tools/circle/deploy.sh
+      #- deploy:
+      #    command: bash build_tools/circle/deploy.sh
 
 workflows:
   version: 2
   build-doc-and-deploy:
     jobs:
       - lint
-      - build:
+      - doc:
           requires:
             - lint
-      - deploy:
+      - doc-min-dependencies:
           requires:
-            - build
+            - lint
+      - pypy3:
           filters:
             branches:
-              only: master
+              only:
+                - 0.20.X
+      - deploy:
+          requires:
+            - doc
   pypy:
     triggers:
       - schedule:
diff --git a/doc/Makefile b/doc/Makefile
index 05ba06142..8ae7c8497 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -44,14 +44,14 @@ html:
 	# the embedding of images more robust
 	rm -rf $(BUILDDIR)/html/_images
 	#rm -rf _build/doctrees/
-	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html/stable
 	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html"
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html/stable"
 
 html-noplot:
-	$(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	$(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html/stable
 	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html/stable."
 
 dirhtml:
 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@@ -107,4 +107,4 @@ optipng:
 	  | xargs -0 -n 1 -P 4 optipng -o10
 
 dist: html latexpdf
-	cp _build/latex/user_guide.pdf _build/html/_downloads/scikit-optimize-docs.pdf
+	cp _build/latex/user_guide.pdf _build/html/stable/_downloads/scikit-optimize-docs.pdf

From 20afee5ce29274201255dc5c71b98aa08982a85b Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 20:15:01 +0100
Subject: [PATCH 098/265] Switch to new deploy

---
 .circleci/config.yml | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 498d06842..c9cb18fdc 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,4 +1,5 @@
 version: 2
+
 jobs:
   build:
     docker:
@@ -134,14 +135,31 @@ jobs:
             - ~/.cache/pip
 
   deploy:
+    docker:
+      - image: circleci/python:3.6
+    steps:
+      - checkout
+      - run: ./build_tools/circle/checkout_merge_commit.sh
+      # Attach documentation generated in the 'doc' step so that it can be
+      # deployed.
+      - attach_workspace:
+          at: doc/_build/html
+      - run: ls -ltrh doc/_build/html/stable
+      - deploy:
+          command: |
+            if [[ "${CIRCLE_BRANCH}" =~ ^master$|^[0-9]+\.[0-9]+\.X$ ]]; then
+              bash build_tools/circle/push_doc.sh doc/_build/html/stable
+            fi
+
+  deploy-old:
     docker:
       - image: circleci/python:3.6.7
     steps:
       - checkout
       - attach_workspace:
           at: ~/doc
-      #- deploy:
-      #    command: bash build_tools/circle/deploy.sh
+      deploy:
+        command: bash build_tools/circle/deploy.sh
 
 workflows:
   version: 2

From 5a38e63b81ba465e535127b91cd4226feeae91f7 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 20:17:13 +0100
Subject: [PATCH 099/265] fix yaml

---
 .circleci/config.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index c9cb18fdc..c068ac73c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -158,8 +158,8 @@ jobs:
       - checkout
       - attach_workspace:
           at: ~/doc
-      deploy:
-        command: bash build_tools/circle/deploy.sh
+      - deploy:
+          command: bash build_tools/circle/deploy.sh
 
 workflows:
   version: 2

From ddca7f56ce7a154c2284d6464cf872a1f4992f3d Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 20:15:15 +0100
Subject: [PATCH 100/265] Revert "Merge remote-tracking branch
 'origin/Improve_circle_ci' into Improve_circle_ci"

This reverts commit 800e9011a388db9c25a4bceb1c67c8553cc91c85.
---
 .travis.yml                                   |   8 +-
 build_tools/travis/test_script.sh             |   7 +-
 doc/modules/classes.rst                       |   1 -
 doc/modules/transformers.rst                  |   7 -
 .../initial-sampling-method-integer.py        |  13 +-
 examples/sampler/initial-sampling-method.py   |  13 +-
 examples/sampler/sampling_comparison.py       |  59 ++--
 pyproject.toml                                |   2 +-
 requirements.txt                              |   2 +-
 setup.py                                      |   2 +-
 skopt/__init__.py                             |   2 +-
 skopt/optimizer/base.py                       |  45 +--
 skopt/optimizer/dummy.py                      |  19 +-
 skopt/optimizer/forest.py                     |  35 +--
 skopt/optimizer/gbrt.py                       |  36 +--
 skopt/optimizer/gp.py                         |  35 +--
 skopt/optimizer/optimizer.py                  |  51 +++-
 skopt/sampler/__init__.py                     |   5 +-
 skopt/sampler/base.py                         |  24 --
 skopt/sampler/grid.py                         | 170 -----------
 skopt/sampler/halton.py                       |   1 -
 skopt/sampler/lhs.py                          |  16 +-
 skopt/tests/test_common.py                    |  90 +++---
 skopt/tests/test_forest_opt.py                |   6 +-
 skopt/tests/test_gp_opt.py                    |  16 +-
 skopt/tests/test_sampler.py                   | 282 ------------------
 skopt/tests/test_samples.py                   | 176 +++++++++++
 skopt/utils.py                                |  61 +---
 28 files changed, 398 insertions(+), 786 deletions(-)
 delete mode 100644 doc/modules/transformers.rst
 delete mode 100644 skopt/sampler/grid.py
 delete mode 100644 skopt/tests/test_sampler.py
 create mode 100644 skopt/tests/test_samples.py

diff --git a/.travis.yml b/.travis.yml
index 6c76b4ceb..871afe045 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,25 +22,25 @@ matrix:
     - name: "Python 3.5 - scikit 0.19.2"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.5"
-           NUMPY_VERSION="1.12.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.9.0"
+           NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.9.0"
            SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.14.0" SCIPY_VERSION="0.19.1" PYAML_VERSION="16.12.0"
+           NUMPY_VERSION="1.12.0" SCIPY_VERSION="0.19.1" PYAML_VERSION="16.12.0"
            SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.7 - scikit 0.21.3"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.15.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
+         NUMPY_VERSION="1.13.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
          SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.12"
     - name: "Python 3.7 - scikit 0.22.1"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.16.0" SCIPY_VERSION="1.2.0" PYAML_VERSION="18.11.0"
+         NUMPY_VERSION="1.14.0" SCIPY_VERSION="1.2.0" PYAML_VERSION="18.11.0"
          SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.13"
     - name: "Python 3.8  latest package versions"
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index b5c05b033..a14e50706 100644
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -11,11 +11,10 @@ set -e
 python --version
 python -c "import numpy; print('numpy %s' % numpy.__version__)"
 python -c "import scipy; print('scipy %s' % scipy.__version__)"
-python -c "import sklearn; print('sklearn %s' % sklearn.__version__)"
 python -c "\
 try:
-    import skopt
-    print('skopt %s' % skopt.__version__)
+    import sklearn
+    print('sklearn %s' % sklearn.__version__)
 except ImportError:
     pass
 "
@@ -37,7 +36,7 @@ run_tests() {
     export SKOPT_SKIP_NETWORK_TESTS=1
 
     if [[ "$COVERAGE" == "true" ]]; then
-        TEST_CMD="$TEST_CMD --cov skopt"
+        TEST_CMD="$TEST_CMD --cov sklearn"
     fi
 
     if [[ -n "$CHECK_WARNINGS" ]]; then
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index f90ebba73..b255dfbb6 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -212,7 +212,6 @@ details.
    :template: function.rst
 
     utils.cook_estimator
-    utils.cook_initial_point_generator
     utils.dimensions_aslist
     utils.expected_minimum
     utils.expected_minimum_random_sampling
diff --git a/doc/modules/transformers.rst b/doc/modules/transformers.rst
deleted file mode 100644
index 34693f287..000000000
--- a/doc/modules/transformers.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-.. currentmodule:: skopt.space.transformers
-
-.. _transformers:
-
-Transformers
-============
-
diff --git a/examples/sampler/initial-sampling-method-integer.py b/examples/sampler/initial-sampling-method-integer.py
index c66a9508b..7430584ba 100644
--- a/examples/sampler/initial-sampling-method-integer.py
+++ b/examples/sampler/initial-sampling-method-integer.py
@@ -31,7 +31,6 @@
 from skopt.sampler import Lhs
 from skopt.sampler import Halton
 from skopt.sampler import Hammersly
-from skopt.sampler import Grid
 from scipy.spatial.distance import pdist
 
 #############################################################################
@@ -51,6 +50,7 @@ def plot_searchspace(x, title):
 
 n_samples = 10
 space = Space([(0, 5), (0, 5)])
+space.set_transformer("normalize")
 
 #############################################################################
 # Random sampling
@@ -151,17 +151,6 @@ def plot_searchspace(x, title):
 pdist_data.append(pdist(x).flatten())
 x_label.append("hammersly")
 
-#############################################################################
-# Grid sampling
-# -------------
-
-grid = Grid(border="include", use_full_layout=False)
-x = grid.generate(space.dimensions, n_samples)
-plot_searchspace(x, 'Grid')
-print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
-pdist_data.append(pdist(x).flatten())
-x_label.append("grid")
-
 #############################################################################
 # Pdist boxplot of all methods
 # ----------------------------
diff --git a/examples/sampler/initial-sampling-method.py b/examples/sampler/initial-sampling-method.py
index ad0e7f903..bbb33e4c5 100644
--- a/examples/sampler/initial-sampling-method.py
+++ b/examples/sampler/initial-sampling-method.py
@@ -32,7 +32,6 @@
 from skopt.sampler import Lhs
 from skopt.sampler import Halton
 from skopt.sampler import Hammersly
-from skopt.sampler import Grid
 from scipy.spatial.distance import pdist
 
 #############################################################################
@@ -51,7 +50,7 @@ def plot_searchspace(x, title):
 n_samples = 10
 
 space = Space([(-5., 10.), (0., 15.)])
-# space.set_transformer("normalize")
+space.set_transformer("normalize")
 
 #############################################################################
 # Random sampling
@@ -143,16 +142,6 @@ def plot_searchspace(x, title):
 pdist_data.append(pdist(x).flatten())
 x_label.append("hammersly")
 
-#############################################################################
-# Grid sampling
-# -------------
-
-grid = Grid(border="include", use_full_layout=False)
-x = grid.generate(space.dimensions, n_samples)
-plot_searchspace(x, 'Grid')
-pdist_data.append(pdist(x).flatten())
-x_label.append("grid")
-
 #############################################################################
 # Pdist boxplot of all methods
 # ----------------------------
diff --git a/examples/sampler/sampling_comparison.py b/examples/sampler/sampling_comparison.py
index 819f5e06d..d027ef366 100644
--- a/examples/sampler/sampling_comparison.py
+++ b/examples/sampler/sampling_comparison.py
@@ -78,20 +78,23 @@ def plot_convergence(result_list, true_minimum=None, yscale=None, title="Converg
     return ax
 
 
-def run(minimizer, initial_point_generator,
-        n_initial_points=10, n_repeats=1):
-    return [minimizer(func, bounds, n_initial_points=n_initial_points,
+def run(minimizer, initial_point_generator, init_point_gen_kwargs,
+        n_random_starts=10, n_repeats=1):
+    return [minimizer(func, bounds, n_random_starts=n_random_starts,
                       initial_point_generator=initial_point_generator,
+                      init_point_gen_kwargs=init_point_gen_kwargs,
                       n_calls=n_calls, random_state=n)
             for n in range(n_repeats)]
 
 
-def run_measure(initial_point_generator, n_initial_points=10):
+def run_measure(initial_point_generator,
+                n_random_starts=10,
+                init_point_gen_kwargs=None):
     start = time.time()
     # n_repeats must set to a much higher value to obtain meaningful results.
     n_repeats = 1
-    res = run(gp_minimize, initial_point_generator,
-              n_initial_points=n_initial_points, n_repeats=n_repeats)
+    res = run(gp_minimize, initial_point_generator, init_point_gen_kwargs,
+              n_random_starts=n_random_starts, n_repeats=n_repeats)
     duration = time.time() - start
     # print("%s %s: %.2f s" % (initial_point_generator,
     #                          str(init_point_gen_kwargs),
@@ -118,7 +121,7 @@ def run_measure(initial_point_generator, n_initial_points=10):
     bounds = [(0., 1.), ] * 6
     true_minimum = -3.32237
     n_calls = 40
-    n_initial_points = 10
+    n_random_starts = 10
     yscale = None
     title = "Convergence plot - hart6"
 else:
@@ -126,26 +129,25 @@ def run_measure(initial_point_generator, n_initial_points=10):
     bounds = [(-5.0, 10.0), (0.0, 15.0)]
     true_minimum = 0.397887
     n_calls = 30
-    n_initial_points = 10
+    n_random_starts = 10
     yscale="log"
     title = "Convergence plot - branin"
 
 #############################################################################
-from skopt.utils import cook_initial_point_generator
+
 
 # Random search
-dummy_res = run_measure("random", n_initial_points)
-lhs = cook_initial_point_generator(
-    "lhs", lhs_type="classic", criterion=None)
-lhs_res = run_measure(lhs, n_initial_points)
-lhs2 = cook_initial_point_generator("lhs", criterion="maximin")
-lhs2_res = run_measure(lhs2, n_initial_points)
-sobol = cook_initial_point_generator("sobol", randomize=False,
-                                     min_skip=1, max_skip=100)
-sobol_res = run_measure(sobol, n_initial_points)
-halton_res = run_measure("halton", n_initial_points)
-hammersly_res = run_measure("hammersly", n_initial_points)
-grid_res = run_measure("grid", n_initial_points)
+dummy_res = run_measure( "random", n_random_starts)
+lhs_res = run_measure("lhs", n_random_starts,
+                      {"lhs_type": "classic",
+                       "criterion": None})
+lhs2_res = run_measure("lhs", n_random_starts,
+                       {"criterion": "maximin"})
+sobol_res = run_measure("sobol", n_random_starts,
+                        {"randomize": False,
+                         "min_skip": 1, "max_skip": 100})
+halton_res = run_measure("halton", n_random_starts)
+hammersly_res = run_measure("hammersly", n_random_starts)
 
 #############################################################################
 # Note that this can take a few minutes.
@@ -155,8 +157,7 @@ def run_measure(initial_point_generator, n_initial_points=10):
                         ("lhs_maximin", lhs2_res),
                         ("sobol", sobol_res),
                         ("halton", halton_res),
-                        ("hammersly", hammersly_res),
-                        ("grid", grid_res)],
+                        ("hammersly", hammersly_res)],
                         true_minimum=true_minimum,
                         yscale=yscale,
                         title=title)
@@ -171,10 +172,12 @@ def run_measure(initial_point_generator, n_initial_points=10):
 
 #############################################################################
 # Test with different n_random_starts values
-lhs2 = cook_initial_point_generator("lhs", criterion="maximin")
-lhs2_15_res = run_measure(lhs2, 12)
-lhs2_20_res = run_measure(lhs2, 14)
-lhs2_25_res = run_measure(lhs2, 16)
+lhs2_15_res = run_measure("lhs", 12,
+                          {"criterion": "maximin"})
+lhs2_20_res = run_measure("lhs", 14,
+                          {"criterion": "maximin"})
+lhs2_25_res = run_measure("lhs", 16,
+                          {"criterion": "maximin"})
 
 #############################################################################
 # n_random_starts = 10 produces the best results
@@ -188,4 +191,4 @@ def run_measure(initial_point_generator, n_initial_points=10):
                         yscale=yscale,
                         title=title)
 
-plt.show()
+plt.show()
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 6fad80b64..6e5347cb2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@
 requires = [
     "setuptools",
     "wheel",
-    "numpy>=1.12",
+    "numpy>=1.11",
     "scipy>=0.18",
     "scikit-learn>=0.19.1",
     "pyaml>=16.9",
diff --git a/requirements.txt b/requirements.txt
index 5f2ef0417..18985117d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-numpy>=1.12
+numpy>=1.11
 scipy>=0.18
 scikit-learn>=0.19
 matplotlib>=2.0.0
diff --git a/setup.py b/setup.py
index 3b311924e..981892675 100644
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,7 @@
       classifiers=CLASSIFIERS,
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
                 'skopt.learning.gaussian_process', 'skopt.sampler'],
-      install_requires=['joblib>=0.11', 'pyaml>=16.9', 'numpy>=1.12.0',
+      install_requires=['joblib>=0.11', 'pyaml>=16.9', 'numpy>=1.11.0',
                         'scipy>=0.18.0',
                         'scikit-learn>=0.19.1'],
       extras_require={
diff --git a/skopt/__init__.py b/skopt/__init__.py
index 1239ffa5d..33c2ade38 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -65,7 +65,7 @@
         "learning",
         "optimizer",
         "plots",
-        "sampler",
+        "samples",
         "space",
         "gp_minimize",
         "dummy_minimize",
diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index d0784ac1c..259dc5076 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -6,7 +6,6 @@
 
 import copy
 import inspect
-import warnings
 import numbers
 try:
     from collections.abc import Iterable
@@ -22,13 +21,13 @@
 
 
 def base_minimize(func, dimensions, base_estimator,
-                  n_calls=100, n_random_starts=None,
-                  n_initial_points=10,
+                  n_calls=100, n_random_starts=10,
                   initial_point_generator="random",
                   acq_func="EI", acq_optimizer="lbfgs",
                   x0=None, y0=None, random_state=None, verbose=False,
                   callback=None, n_points=10000, n_restarts_optimizer=5,
-                  xi=0.01, kappa=1.96, n_jobs=1, model_queue_size=None):
+                  xi=0.01, kappa=1.96, n_jobs=1, model_queue_size=None,
+                  init_point_gen_kwargs=None):
     """Base optimizer class
     Parameters
     ----------
@@ -37,7 +36,7 @@ def base_minimize(func, dimensions, base_estimator,
         and return the objective value.
     
         If you have a search-space where all dimensions have names,
-        then you can use :func:`skopt.utils.use_named_args` as a decorator
+        then you can use `skopt.utils.use_named_args` as a decorator
         on your objective function, in order to call it directly
         with the named arguments. See `use_named_args` for an example.
 
@@ -69,16 +68,9 @@ def base_minimize(func, dimensions, base_estimator,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
-        .. deprecated:: 0.9
-            use `n_initial_points` instead.
 
-    n_initial_points : int, default=10
-        Number of evaluations of `func` with initialization points
-        before approximating it with `base_estimator`. Initial point
-        generator can be changed by setting `initial_point_generator`.
-
-    initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
@@ -86,7 +78,6 @@ def base_minimize(func, dimensions, base_estimator,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
-        - "grid" for a uniform grid sequence
 
     acq_func : string, default=`"EI"`
         Function to minimize over the posterior distribution. Can be either
@@ -184,6 +175,9 @@ def base_minimize(func, dimensions, base_estimator,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -229,28 +223,20 @@ def base_minimize(func, dimensions, base_estimator,
         x0 = [x0]
     if not isinstance(x0, list):
         raise ValueError("`x0` should be a list, but got %s" % type(x0))
-
-    # Check `n_random_starts` deprecation first
-    if n_random_starts is not None:
-        warnings.warn(("n_random_starts will be removed in favour of "
-                       "n_initial_points. It overwrites n_initial_points."),
-                      DeprecationWarning)
-        n_initial_points = n_random_starts
-
-    if n_initial_points <= 0 and not x0:
-        raise ValueError("Either set `n_initial_points` > 0,"
+    if n_random_starts <= 0 and not x0:
+        raise ValueError("Either set `n_random_starts` > 0,"
                          " or provide `x0`")
     # check y0: list-like, requirement of maximal calls
     if isinstance(y0, Iterable):
         y0 = list(y0)
     elif isinstance(y0, numbers.Number):
         y0 = [y0]
-    required_calls = n_initial_points + (len(x0) if not y0 else 0)
+    required_calls = n_random_starts + (len(x0) if not y0 else 0)
     if n_calls < required_calls:
         raise ValueError(
             "Expected `n_calls` >= %d, got %d" % (required_calls, n_calls))
     # calculate the total number of initial points
-    n_initial_points = n_initial_points + len(x0)
+    n_initial_points = n_random_starts + len(x0)
 
     # Build optimizer
 
@@ -262,7 +248,8 @@ def base_minimize(func, dimensions, base_estimator,
                           random_state=random_state,
                           model_queue_size=model_queue_size,
                           acq_optimizer_kwargs=acq_optimizer_kwargs,
-                          acq_func_kwargs=acq_func_kwargs)
+                          acq_func_kwargs=acq_func_kwargs,
+                          init_point_gen_kwargs=init_point_gen_kwargs)
     # check x0: element-wise data type, dimensionality
     assert all(isinstance(p, Iterable) for p in x0)
     if not all(len(p) == optimizer.space.n_dims for p in x0):
@@ -273,7 +260,7 @@ def base_minimize(func, dimensions, base_estimator,
     if verbose:
         callbacks.append(VerboseCallback(
             n_init=len(x0) if not y0 else 0,
-            n_random=n_initial_points,
+            n_random=n_random_starts,
             n_total=n_calls))
 
     # Record provided points
diff --git a/skopt/optimizer/dummy.py b/skopt/optimizer/dummy.py
index 1e432d1de..6bf43caa9 100644
--- a/skopt/optimizer/dummy.py
+++ b/skopt/optimizer/dummy.py
@@ -16,7 +16,7 @@ def dummy_minimize(func, dimensions, n_calls=100,
         and return the objective value.
     
         If you have a search-space where all dimensions have names,
-        then you can use :func:`skopt.utils.use_named_args` as a decorator
+        then you can use `skopt.utils.use_named_args` as a decorator
         on your objective function, in order to call it directly
         with the named arguments. See `use_named_args` for an example.
 
@@ -35,8 +35,8 @@ def dummy_minimize(func, dimensions, n_calls=100,
     n_calls : int, default=100
         Number of calls to `func` to find the minimum.
 
-    initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
@@ -44,7 +44,6 @@ def dummy_minimize(func, dimensions, n_calls=100,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
-        - "grid" for a uniform grid sequence
 
     x0 : list, list of lists or `None`
         Initial input points.
@@ -80,6 +79,9 @@ def dummy_minimize(func, dimensions, n_calls=100,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -108,16 +110,17 @@ def dummy_minimize(func, dimensions, n_calls=100,
     # all our calls want random suggestions, except if we need to evaluate
     # some initial points
     if x0 is not None and y0 is None:
-        n_initial_points = n_calls - len(x0)
+        n_random_calls = n_calls - len(x0)
     else:
-        n_initial_points = n_calls
+        n_random_calls = n_calls
 
     return base_minimize(func, dimensions, base_estimator="dummy",
                          # explicitly set optimizer to sampling as "dummy"
                          # minimizer does not provide gradients.
                          acq_optimizer="sampling",
-                         n_calls=n_calls, n_initial_points=n_initial_points,
+                         n_calls=n_calls, n_random_starts=n_random_calls,
                          initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state,
                          verbose=verbose,
-                         callback=callback, model_queue_size=model_queue_size)
+                         callback=callback, model_queue_size=model_queue_size,
+                         init_point_gen_kwargs=init_point_gen_kwargs)
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index b734e3589..ad51ef72a 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -8,11 +8,12 @@
 
 
 def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
-                    n_random_starts=None, n_initial_points=10, acq_func="EI",
+                    n_random_starts=10, acq_func="EI",
                     initial_point_generator="random",
                     x0=None, y0=None, random_state=None, verbose=False,
                     callback=None, n_points=10000, xi=0.01, kappa=1.96,
-                    n_jobs=1, model_queue_size=None):
+                    n_jobs=1, model_queue_size=None,
+                    init_point_gen_kwargs=None):
     """Sequential optimisation using decision trees.
 
     A tree based regression model is used to model the expensive to evaluate
@@ -22,16 +23,13 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
 
     The total number of evaluations, `n_calls`, are performed like the
     following. If `x0` is provided but not `y0`, then the elements of `x0`
-    are first evaluated, followed by `n_initial_points` evaluations.
-    Finally, `n_calls - len(x0) - n_initial_points` evaluations are
+    are first evaluated, followed by `n_random_starts` evaluations.
+    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
     made guided by the surrogate model. If `x0` and `y0` are both
-    provided then `n_initial_points` evaluations are first made then
-    `n_calls - n_initial_points` subsequent evaluations are made
+    provided then `n_random_starts` evaluations are first made then
+    `n_calls - n_random_starts` subsequent evaluations are made
     guided by the surrogate model.
 
-    The first `n_initial_points` are generated by the
-    `initial_point_generator`.
-
     Parameters
     ----------
     func : callable
@@ -78,16 +76,9 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
-        .. deprecated:: 0.9
-            use `n_initial_points` instead.
-
-    n_initial_points : int, default=10
-        Number of evaluations of `func` with initialization points
-        before approximating it with `base_estimator`. Initial point
-        generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
@@ -95,7 +86,6 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
-        - "grid" for a uniform grid sequence
 
     acq_func : string, default="LCB"
         Function to minimize over the forest posterior. Can be either
@@ -161,6 +151,9 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -191,10 +184,10 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
     return base_minimize(func, dimensions, base_estimator,
                          n_calls=n_calls, n_points=n_points,
                          n_random_starts=n_random_starts,
-                         n_initial_points=n_initial_points,
                          initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state,
                          acq_func=acq_func,
                          xi=xi, kappa=kappa, verbose=verbose,
                          callback=callback, acq_optimizer="sampling",
-                         model_queue_size=model_queue_size)
+                         model_queue_size=model_queue_size,
+                         init_point_gen_kwargs=init_point_gen_kwargs)
diff --git a/skopt/optimizer/gbrt.py b/skopt/optimizer/gbrt.py
index 2196a4d5c..18f51ce94 100644
--- a/skopt/optimizer/gbrt.py
+++ b/skopt/optimizer/gbrt.py
@@ -6,13 +6,13 @@
 
 
 def gbrt_minimize(func, dimensions, base_estimator=None,
-                  n_calls=100, n_random_starts=None,
-                  n_initial_points=10,
+                  n_calls=100, n_random_starts=10,
                   initial_point_generator="random",
                   acq_func="EI", acq_optimizer="auto",
                   x0=None, y0=None, random_state=None, verbose=False,
                   callback=None, n_points=10000, xi=0.01, kappa=1.96,
-                  n_jobs=1, model_queue_size=None):
+                  n_jobs=1, model_queue_size=None,
+                  init_point_gen_kwargs=None):
     """Sequential optimization using gradient boosted trees.
 
     Gradient boosted regression trees are used to model the (very)
@@ -23,16 +23,13 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
 
     The total number of evaluations, `n_calls`, are performed like the
     following. If `x0` is provided but not `y0`, then the elements of `x0`
-    are first evaluated, followed by `n_initial_points` evaluations.
-    Finally, `n_calls - len(x0) - n_initial_points` evaluations are
+    are first evaluated, followed by `n_random_starts` evaluations.
+    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
     made guided by the surrogate model. If `x0` and `y0` are both
-    provided then `n_initial_points` evaluations are first made then
-    `n_calls - n_initial_points` subsequent evaluations are made
+    provided then `n_random_starts` evaluations are first made then
+    `n_calls - n_random_starts` subsequent evaluations are made
     guided by the surrogate model.
 
-    The first `n_initial_points` are generated by the
-    `initial_point_generator`.
-
     Parameters
     ----------
     func : callable
@@ -65,16 +62,9 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
-        .. deprecated:: 0.9
-            use `n_initial_points` instead.
-
-    n_initial_points : int, default=10
-        Number of evaluations of `func` with initialization points
-        before approximating it with `base_estimator`. Initial point
-        generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
@@ -82,7 +72,6 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
-        - "grid" for a uniform grid sequence
 
     acq_func : string, default=`"LCB"`
         Function to minimize over the forest posterior. Can be either
@@ -146,6 +135,9 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -182,9 +174,9 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
     return base_minimize(func, dimensions, base_estimator,
                          n_calls=n_calls, n_points=n_points,
                          n_random_starts=n_random_starts,
-                         n_initial_points=n_initial_points,
                          initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state, xi=xi,
                          kappa=kappa, acq_func=acq_func, verbose=verbose,
                          callback=callback, acq_optimizer="sampling",
-                         model_queue_size=model_queue_size)
+                         model_queue_size=model_queue_size,
+                         init_point_gen_kwargs=init_point_gen_kwargs)
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index dac3e3e9f..a6091578d 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -10,13 +10,13 @@
 
 
 def gp_minimize(func, dimensions, base_estimator=None,
-                n_calls=100, n_random_starts=None,
-                n_initial_points=10,
+                n_calls=100, n_random_starts=10,
                 initial_point_generator="random",
                 acq_func="gp_hedge", acq_optimizer="auto", x0=None, y0=None,
                 random_state=None, verbose=False, callback=None,
                 n_points=10000, n_restarts_optimizer=5, xi=0.01, kappa=1.96,
-                noise="gaussian", n_jobs=1, model_queue_size=None):
+                noise="gaussian", n_jobs=1, model_queue_size=None,
+                init_point_gen_kwargs=None):
     """Bayesian optimization using Gaussian Processes.
 
     If every function evaluation is expensive, for instance
@@ -34,16 +34,13 @@ def gp_minimize(func, dimensions, base_estimator=None,
 
     The total number of evaluations, `n_calls`, are performed like the
     following. If `x0` is provided but not `y0`, then the elements of `x0`
-    are first evaluated, followed by `n_initial_points` evaluations.
-    Finally, `n_calls - len(x0) - n_initial_points` evaluations are
+    are first evaluated, followed by `n_random_starts` evaluations.
+    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
     made guided by the surrogate model. If `x0` and `y0` are both
-    provided then `n_initial_points` evaluations are first made then
-    `n_calls - n_initial_points` subsequent evaluations are made
+    provided then `n_random_starts` evaluations are first made then
+    `n_calls - n_random_starts` subsequent evaluations are made
     guided by the surrogate model.
 
-    The first `n_initial_points` are generated by the
-    `initial_point_generator`.
-
     Parameters
     ----------
     func : callable
@@ -91,16 +88,9 @@ def gp_minimize(func, dimensions, base_estimator=None,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
-        .. deprecated:: 0.9
-            use `n_initial_points` instead.
-
-    n_initial_points : int, default=10
-        Number of evaluations of `func` with initialization points
-        before approximating it with `base_estimator`. Initial point
-        generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
@@ -244,6 +234,9 @@ def gp_minimize(func, dimensions, base_estimator=None,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -288,8 +281,8 @@ def gp_minimize(func, dimensions, base_estimator=None,
         acq_func=acq_func,
         xi=xi, kappa=kappa, acq_optimizer=acq_optimizer, n_calls=n_calls,
         n_points=n_points, n_random_starts=n_random_starts,
-        n_initial_points=n_initial_points,
         initial_point_generator=initial_point_generator,
         n_restarts_optimizer=n_restarts_optimizer,
         x0=x0, y0=y0, random_state=rng, verbose=verbose,
-        callback=callback, n_jobs=n_jobs, model_queue_size=model_queue_size)
+        callback=callback, n_jobs=n_jobs, model_queue_size=model_queue_size,
+        init_point_gen_kwargs=init_point_gen_kwargs)
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index bdd929f1e..97bb481be 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -16,6 +16,7 @@
 from ..acquisition import _gaussian_acquisition
 from ..acquisition import gaussian_acquisition_1D
 from ..learning import GaussianProcessRegressor
+from ..sampler import Sobol, Lhs, Hammersly, Halton
 from ..space import Categorical
 from ..space import Space
 from ..utils import check_x_in_space
@@ -25,7 +26,6 @@
 from ..utils import is_listlike
 from ..utils import is_2Dlistlike
 from ..utils import normalize_dimensions
-from ..utils import cook_initial_point_generator
 
 
 class Optimizer(object):
@@ -52,8 +52,8 @@ class Optimizer(object):
         - an instance of a `Dimension` object (`Real`, `Integer` or
           `Categorical`).
 
-    base_estimator : `"GP"`, `"RF"`, `"ET"`, `"GBRT"` or sklearn regressor, \
-            default=`"GP"`
+    base_estimator : `"GP"`, `"RF"`, `"ET"`, `"GBRT"` or sklearn regressor,
+    default=`"GP"`
         Should inherit from :obj:`sklearn.base.RegressorMixin`.
         In addition the `predict` method, should have an optional `return_std`
         argument, which returns `std(Y | x)`` along with `E[Y | x]`.
@@ -62,7 +62,7 @@ class Optimizer(object):
         is used in the minimize functions.
 
     n_random_starts : int, default=10
-        .. deprecated:: 0.9
+        .. deprecated::
             use `n_initial_points` instead.
 
     n_initial_points : int, default=10
@@ -70,16 +70,15 @@ class Optimizer(object):
         before approximating it with `base_estimator`. Initial point
         generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+    initial_point_generator : str, InitialPointGenerator instance,
+    default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
         - "sobol" for a Sobol sequence,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
-        - "lhs" for a latin hypercube sequence,
-        - "grid" for a uniform grid sequence
+        - "lhs" for a latin hypercube sequence
 
     acq_func : string, default=`"gp_hedge"`
         Function to minimize over the posterior distribution. Can be either
@@ -132,6 +131,9 @@ class Optimizer(object):
     acq_optimizer_kwargs : dict
         Additional arguments to be passed to the acquistion optimizer.
 
+    init_point_gen_kwargs : dict
+        Additional arguments to be passed to the initial_point_generator
+
     model_queue_size : int or None, default=None
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
@@ -159,7 +161,8 @@ def __init__(self, dimensions, base_estimator="gp",
                  random_state=None,
                  model_queue_size=None,
                  acq_func_kwargs=None,
-                 acq_optimizer_kwargs=None):
+                 acq_optimizer_kwargs=None,
+                 init_point_gen_kwargs=None):
 
         self.rng = check_random_state(random_state)
 
@@ -259,10 +262,29 @@ def __init__(self, dimensions, base_estimator="gp",
         self.space = Space(dimensions)
 
         self._initial_samples = None
-        self._initial_point_generator = cook_initial_point_generator(
-            initial_point_generator)
-
-        if self._initial_point_generator is not None:
+        self._initial_point_generator = initial_point_generator
+        if init_point_gen_kwargs is None:
+            init_point_gen_kwargs = dict()
+        self.init_point_gen_kwargs = init_point_gen_kwargs
+        if initial_point_generator != "random" and \
+                isinstance(initial_point_generator, str):
+            if initial_point_generator == "sobol":
+                self._initial_point_generator = Sobol(
+                    **self.init_point_gen_kwargs)
+            elif initial_point_generator == "halton":
+                self._initial_point_generator = Halton(
+                    **self.init_point_gen_kwargs)
+            elif initial_point_generator == "hammersly":
+                self._initial_point_generator = Hammersly(
+                    **self.init_point_gen_kwargs)
+            elif initial_point_generator == "lhs":
+                self._initial_point_generator = Lhs(
+                    **self.init_point_gen_kwargs)
+            else:
+                raise ValueError(
+                    "Unkown initial_point_generator: " +
+                    str(initial_point_generator)
+                )
             transformer = self.space.get_transformer()
             self._initial_samples = self._initial_point_generator.generate(
                 self.space.dimensions, n_initial_points,
@@ -311,7 +333,8 @@ def copy(self, random_state=None):
             acq_optimizer=self.acq_optimizer,
             acq_func_kwargs=self.acq_func_kwargs,
             acq_optimizer_kwargs=self.acq_optimizer_kwargs,
-            random_state=random_state
+            init_point_gen_kwargs=self.init_point_gen_kwargs,
+            random_state=random_state,
         )
         optimizer._initial_samples = self._initial_samples
         if hasattr(self, "gains_"):
diff --git a/skopt/sampler/__init__.py b/skopt/sampler/__init__.py
index 61224ac69..c8c67693c 100644
--- a/skopt/sampler/__init__.py
+++ b/skopt/sampler/__init__.py
@@ -5,12 +5,9 @@
 from .sobol import Sobol
 from .halton import Halton
 from .hammersly import Hammersly
-from .grid import Grid
-from .base import InitialPointGenerator
 
 
 __all__ = [
     "Lhs", "Sobol",
-    "Halton", "Hammersly",
-    "Grid", "InitialPointGenerator"
+    "Halton", "Hammersly"
 ]
diff --git a/skopt/sampler/base.py b/skopt/sampler/base.py
index 39dc6af5c..7811328c3 100644
--- a/skopt/sampler/base.py
+++ b/skopt/sampler/base.py
@@ -1,28 +1,4 @@
 
-from collections import defaultdict
-
-
 class InitialPointGenerator(object):
     def generate(self, dimensions, n_samples, random_state=None):
         raise NotImplemented
-
-    def set_params(self, **params):
-        """
-        Set the parameters of this initial point generator.
-
-        Parameters
-        ----------
-        **params : dict
-            Generator parameters.
-        Returns
-        -------
-        self : object
-            Generator instance.
-        """
-        if not params:
-            # Simple optimization to gain speed (inspect is slow)
-            return self
-        for key, value in params.items():
-            setattr(self, key, value)
-
-        return self
diff --git a/skopt/sampler/grid.py b/skopt/sampler/grid.py
deleted file mode 100644
index 2e90d7a2e..000000000
--- a/skopt/sampler/grid.py
+++ /dev/null
@@ -1,170 +0,0 @@
-"""
-Inspired by https://github.com/jonathf/chaospy/blob/master/chaospy/
-distributions/sampler/sequences/grid.py
-"""
-import numpy as np
-from .base import InitialPointGenerator
-from ..space import Space
-from sklearn.utils import check_random_state
-
-
-def _quadrature_combine(args):
-    args = [np.asarray(arg).reshape(len(arg), -1) for arg in args]
-    shapes = [arg.shape for arg in args]
-
-    size = np.prod(shapes, 0)[0] * np.sum(shapes, 0)[1]
-    if size > 10 ** 9:
-        raise MemoryError("Too large sets")
-
-    out = args[0]
-    for arg in args[1:]:
-        out = np.hstack([
-            np.tile(out, len(arg)).reshape(-1, out.shape[1]),
-            np.tile(arg.T, len(out)).reshape(arg.shape[1], -1).T,
-        ])
-    return out
-
-
-def _create_uniform_grid_exclude_border(n_dim, order):
-    assert order > 0
-    assert n_dim > 0
-    x_data = np.arange(1, order + 1) / (order + 1.)
-    x_data = _quadrature_combine([x_data] * n_dim)
-    return x_data
-
-
-def _create_uniform_grid_include_border(n_dim, order):
-    assert order > 1
-    assert n_dim > 0
-    x_data = np.arange(0, order) / (order - 1.)
-    x_data = _quadrature_combine([x_data] * n_dim)
-    return x_data
-
-
-def _create_uniform_grid_only_border(n_dim, order):
-    assert n_dim > 0
-    assert order > 1
-    x = [[0., 1.]] * (n_dim - 1)
-    x.append(list(np.arange(0, order) / (order - 1.)))
-    x_data = _quadrature_combine(x)
-    return x_data
-
-
-class Grid(InitialPointGenerator):
-    """Generate samples from a regular grid.
-
-    Parameters
-    ----------
-    border : str, default='exclude'
-        defines how the samples are generated:
-        - 'include' : Includes the border into the grid layout
-        - 'exclude' : Excludes the border from the grid layout
-        - 'only' : Selects only points at the border of the dimension
-    use_full_layout : boolean, default=True
-        When True, a  full factorial design is generated and
-        missing points are taken from the next larger full factorial
-        design, depending on `append_border`
-        When False, the next larger  full factorial design is
-        generated and points are randomly selected from it.
-    append_border : str, default="only"
-        When use_full_layout is True, this parameter defines how the missing
-        points will be generated from the next larger grid layout:
-        - 'include' : Includes the border into the grid layout
-        - 'exclude' : Excludes the border from the grid layout
-        - 'only' : Selects only points at the border of the dimension
-    """
-
-    def __init__(self, border="exclude", use_full_layout=True,
-                 append_border="only"):
-        self.border = border
-        self.use_full_layout = use_full_layout
-        self.append_border = append_border
-
-    def generate(self, dimensions, n_samples, random_state=None):
-        """Creates samples from a regular grid.
-
-        Parameters
-        ----------
-        dimensions : list, shape (n_dims,)
-            List of search space dimensions.
-            Each search dimension can be defined either as
-
-            - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
-              dimensions),
-            - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
-              dimensions),
-            - as a list of categories (for `Categorical` dimensions), or
-            - an instance of a `Dimension` object (`Real`, `Integer` or
-              `Categorical`).
-
-        n_samples : int
-            The order of the Halton sequence. Defines the number of samples.
-        random_state : int, RandomState instance, or None (default)
-            Set random state to something other than None for reproducible
-            results.
-
-        Returns
-        -------
-        np.array, shape=(n_dim, n_samples)
-            grid set
-        """
-        rng = check_random_state(random_state)
-        space = Space(dimensions)
-        n_dim = space.n_dims
-        transformer = space.get_transformer()
-        space.set_transformer("normalize")
-
-        if self.border == "include":
-            if self.use_full_layout:
-                order = int(np.floor(np.sqrt(n_samples)))
-            else:
-                order = int(np.ceil(np.sqrt(n_samples)))
-            if order < 2:
-                order = 2
-            h = _create_uniform_grid_include_border(n_dim, order)
-        elif self.border == "exclude":
-            if self.use_full_layout:
-                order = int(np.floor(np.sqrt(n_samples)))
-            else:
-                order = int(np.ceil(np.sqrt(n_samples)))
-            if order < 1:
-                order = 1
-            h = _create_uniform_grid_exclude_border(n_dim, order)
-        elif self.border == "only":
-            if self.use_full_layout:
-                order = int(np.floor(n_samples / 2.))
-            else:
-                order = int(np.ceil(n_samples / 2.))
-            if order < 2:
-                order = 2
-            h = _create_uniform_grid_exclude_border(n_dim, order)
-        else:
-            raise ValueError("Wrong value for border")
-        if np.size(h, 0) > n_samples:
-            rng.shuffle(h)
-            h = h[:n_samples, :]
-        elif np.size(h, 0) < n_samples:
-            if self.append_border == "only":
-                order = int(np.ceil((n_samples - np.size(h, 0)) / 2.))
-                if order < 2:
-                    order = 2
-                h2 = _create_uniform_grid_only_border(n_dim, order)
-            elif self.append_border == "include":
-                order = int(np.ceil(np.sqrt(n_samples - np.size(h, 0))))
-                if order < 2:
-                    order = 2
-                h2 = _create_uniform_grid_include_border(n_dim, order)
-            elif self.append_border == "exclude":
-                order = int(np.ceil(np.sqrt(n_samples - np.size(h, 0))))
-                if order < 1:
-                    order = 1
-                h2 = _create_uniform_grid_exclude_border(n_dim, order)
-            else:
-                raise ValueError("Wrong value for append_border")
-            h = np.vstack((h, h2[:(n_samples - np.size(h, 0))]))
-            rng.shuffle(h)
-        else:
-            rng.shuffle(h)
-        h = space.inverse_transform(h)
-        space.set_transformer(transformer)
-        return h
diff --git a/skopt/sampler/halton.py b/skopt/sampler/halton.py
index 55b48ffb6..e8ea9fc8d 100644
--- a/skopt/sampler/halton.py
+++ b/skopt/sampler/halton.py
@@ -58,7 +58,6 @@ def generate(self, dimensions, n_samples, random_state=None):
         random_state : int, RandomState instance, or None (default)
             Set random state to something other than None for reproducible
             results.
-
         Returns
         -------
         np.array, shape=(n_dim, n_samples)
diff --git a/skopt/sampler/lhs.py b/skopt/sampler/lhs.py
index d63a15bb3..416a90770 100644
--- a/skopt/sampler/lhs.py
+++ b/skopt/sampler/lhs.py
@@ -6,20 +6,11 @@
 import numpy as np
 from sklearn.utils import check_random_state
 from scipy import spatial
+from ..utils import random_permute_matrix
 from ..space import Space, Categorical
 from .base import InitialPointGenerator
 
 
-def _random_permute_matrix(h, random_state=None):
-    rng = check_random_state(random_state)
-    h_rand_perm = np.zeros_like(h)
-    samples, n = h.shape
-    for j in range(n):
-        order = rng.permutation(range(samples))
-        h_rand_perm[:, j] = h[order, j]
-    return h_rand_perm
-
-
 class Lhs(InitialPointGenerator):
     """Latin hypercube sampling
 
@@ -93,8 +84,7 @@ def generate(self, dimensions, n_samples, random_state=None):
                     # Generate a random LHS
                     h = self._lhs_normalized(n_dim, n_samples, rng)
                     r = np.corrcoef(np.array(h).T)
-                    if len(np.abs(r[r != 1])) > 0 and \
-                            np.max(np.abs(r[r != 1])) < mincorr:
+                    if np.max(np.abs(r[r != 1])) < mincorr:
                         mincorr = np.max(np.abs(r - np.eye(r.shape[0])))
                         h_opt = h.copy()
                         h_opt = space.inverse_transform(h_opt)
@@ -142,4 +132,4 @@ def _lhs_normalized(self, n_dim, n_samples, random_state):
                 h[:, j] = u[:, j] * np.diff(x) + x[:n_samples]
         else:
             raise ValueError("Wrong lhs_type. Got ".format(self.lhs_type))
-        return _random_permute_matrix(h, random_state=rng)
+        return random_permute_matrix(h, random_state=rng)
diff --git a/skopt/tests/test_common.py b/skopt/tests/test_common.py
index ac9cbdf78..9b299e43f 100644
--- a/skopt/tests/test_common.py
+++ b/skopt/tests/test_common.py
@@ -113,11 +113,11 @@ def test_minimizer_api_dummy_minimize(verbose, call):
 @pytest.mark.parametrize("minimizer", MINIMIZERS)
 def test_minimizer_api(verbose, call, minimizer):
     n_calls = 7
-    n_initial_points = 3
-    n_models = n_calls - n_initial_points + 1
+    n_random_starts = 3
+    n_models = n_calls - n_random_starts + 1
 
     result = minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                       n_initial_points=n_initial_points,
+                       n_random_starts=n_random_starts,
                        n_calls=n_calls,
                        random_state=1,
                        verbose=verbose, callback=call)
@@ -133,10 +133,10 @@ def test_minimizer_api(verbose, call, minimizer):
 def test_minimizer_api_random_only(minimizer):
     # no models should be fit as we only evaluate at random points
     n_calls = 5
-    n_initial_points = 5
+    n_random_starts = 5
 
     result = minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                       n_initial_points=n_initial_points,
+                       n_random_starts=n_random_starts,
                        n_calls=n_calls,
                        random_state=1)
 
@@ -150,15 +150,15 @@ def test_fixed_random_states(minimizer):
     # check that two runs produce exactly same results, if not there is a
     # random state somewhere that is not reproducible
     n_calls = 4
-    n_initial_points = 2
+    n_random_starts = 2
 
     space = [(-5.0, 10.0), (0.0, 15.0)]
     result1 = minimizer(branin, space, n_calls=n_calls,
-                        n_initial_points=n_initial_points, random_state=1)
+                        n_random_starts=n_random_starts, random_state=1)
 
     dimensions = [(-5.0, 10.0), (0.0, 15.0)]
     result2 = minimizer(branin, dimensions, n_calls=n_calls,
-                        n_initial_points=n_initial_points, random_state=1)
+                        n_random_starts=n_random_starts, random_state=1)
 
     assert_array_almost_equal(result1.x_iters, result2.x_iters)
     assert_array_almost_equal(result1.func_vals, result2.func_vals)
@@ -170,28 +170,28 @@ def test_minimizer_with_space(minimizer):
     # check we can pass a Space instance as dimensions argument and get same
     # result
     n_calls = 4
-    n_initial_points = 2
+    n_random_starts = 2
 
     space = Space([(-5.0, 10.0), (0.0, 15.0)])
     space_result = minimizer(branin, space, n_calls=n_calls,
-                             n_initial_points=n_initial_points, random_state=1)
+                             n_random_starts=n_random_starts, random_state=1)
 
     check_minimizer_api(space_result, n_calls)
     check_minimizer_bounds(space_result, n_calls)
 
     dimensions = [(-5.0, 10.0), (0.0, 15.0)]
     result = minimizer(branin, dimensions, n_calls=n_calls,
-                       n_initial_points=n_initial_points, random_state=1)
+                       n_random_starts=n_random_starts, random_state=1)
 
     assert_array_almost_equal(space_result.x_iters, result.x_iters)
     assert_array_almost_equal(space_result.func_vals, result.func_vals)
 
 
 @pytest.mark.slow_test
-@pytest.mark.parametrize("n_initial_points", [0, 1, 2, 3, 4])
+@pytest.mark.parametrize("n_random_starts", [0, 1, 2, 3, 4])
 @pytest.mark.parametrize("optimizer_func",
                          [gp_minimize, forest_minimize, gbrt_minimize])
-def test_init_vals_and_models(n_initial_points, optimizer_func):
+def test_init_vals_and_models(n_random_starts, optimizer_func):
     # test how many models are fitted when using initial points, y0 values
     # and random starts
     space = [(-5.0, 10.0), (0.0, 15.0)]
@@ -199,40 +199,40 @@ def test_init_vals_and_models(n_initial_points, optimizer_func):
     y0 = list(map(branin, x0))
     n_calls = 7
 
-    optimizer = partial(optimizer_func, n_initial_points=n_initial_points)
+    optimizer = partial(optimizer_func, n_random_starts=n_random_starts)
     res = optimizer(branin, space, x0=x0, y0=y0, random_state=0,
                     n_calls=n_calls)
 
-    assert_equal(len(res.models), n_calls - n_initial_points + 1)
+    assert_equal(len(res.models), n_calls - n_random_starts + 1)
 
 
 @pytest.mark.slow_test
-@pytest.mark.parametrize("n_initial_points", [0, 1, 2, 3, 4])
+@pytest.mark.parametrize("n_random_starts", [0, 1, 2, 3, 4])
 @pytest.mark.parametrize("optimizer_func",
                          [gp_minimize, forest_minimize, gbrt_minimize])
-def test_init_points_and_models(n_initial_points, optimizer_func):
+def test_init_points_and_models(n_random_starts, optimizer_func):
     # test how many models are fitted when using initial points and random
     # starts (no y0 in this case)
     space = [(-5.0, 10.0), (0.0, 15.0)]
     x0 = [[1, 2], [3, 4], [5, 6]]
     n_calls = 7
 
-    optimizer = partial(optimizer_func, n_initial_points=n_initial_points)
+    optimizer = partial(optimizer_func, n_random_starts=n_random_starts)
     res = optimizer(branin, space, x0=x0, random_state=0,
                     n_calls=n_calls)
-    assert_equal(len(res.models), n_calls - len(x0) - n_initial_points + 1)
+    assert_equal(len(res.models), n_calls - len(x0) - n_random_starts + 1)
 
 
 @pytest.mark.slow_test
-@pytest.mark.parametrize("n_initial_points", [0, 5])
+@pytest.mark.parametrize("n_random_starts", [0, 5])
 @pytest.mark.parametrize("optimizer_func",
                          [gp_minimize, forest_minimize, gbrt_minimize])
-def test_init_vals(n_initial_points, optimizer_func):
+def test_init_vals(n_random_starts, optimizer_func):
     space = [(-5.0, 10.0), (0.0, 15.0)]
     x0 = [[1, 2], [3, 4], [5, 6]]
-    n_calls = len(x0) + n_initial_points + 1
+    n_calls = len(x0) + n_random_starts + 1
 
-    optimizer = partial(optimizer_func, n_initial_points=n_initial_points)
+    optimizer = partial(optimizer_func, n_random_starts=n_random_starts)
     check_init_vals(optimizer, branin, space, x0, n_calls)
 
 
@@ -247,9 +247,9 @@ def test_init_vals_dummy_minimize():
 @pytest.mark.slow_test
 @pytest.mark.parametrize("optimizer", [
         dummy_minimize,
-        partial(gp_minimize, n_initial_points=0),
-        partial(forest_minimize, n_initial_points=0),
-        partial(gbrt_minimize, n_initial_points=0)])
+        partial(gp_minimize, n_random_starts=0),
+        partial(forest_minimize, n_random_starts=0),
+        partial(gbrt_minimize, n_random_starts=0)])
 def test_categorical_init_vals(optimizer):
     space = [("-2", "-1", "0", "1", "2")]
     x0 = [["0"], ["1"], ["2"]]
@@ -260,9 +260,9 @@ def test_categorical_init_vals(optimizer):
 @pytest.mark.slow_test
 @pytest.mark.parametrize("optimizer", [
         dummy_minimize,
-        partial(gp_minimize, n_initial_points=0),
-        partial(forest_minimize, n_initial_points=0),
-        partial(gbrt_minimize, n_initial_points=0)])
+        partial(gp_minimize, n_random_starts=0),
+        partial(forest_minimize, n_random_starts=0),
+        partial(gbrt_minimize, n_random_starts=0)])
 def test_mixed_spaces(optimizer):
     space = [("-2", "-1", "0", "1", "2"), (-2.0, 2.0)]
     x0 = [["0", 2.0], ["1", 1.0], ["2", 1.0]]
@@ -326,24 +326,24 @@ def test_invalid_n_calls_arguments(minimizer):
 
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                  n_initial_points=0, random_state=1)
+                  n_random_starts=0, random_state=1)
 
-    # n_calls >= n_initial_points
+    # n_calls >= n_random_starts
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                  n_calls=1, n_initial_points=10, random_state=1)
+                  n_calls=1, n_random_starts=10, random_state=1)
 
-    # n_calls >= n_initial_points + len(x0)
+    # n_calls >= n_random_starts + len(x0)
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)], n_calls=1,
                   x0=[[-1, 2], [-3, 3], [2, 5]], random_state=1,
-                  n_initial_points=7)
+                  n_random_starts=7)
 
-    # n_calls >= n_initial_points
+    # n_calls >= n_random_starts
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)], n_calls=1,
                   x0=[[-1, 2], [-3, 3], [2, 5]], y0=[2.0, 3.0, 5.0],
-                  random_state=1, n_initial_points=7)
+                  random_state=1, n_random_starts=7)
 
 
 @pytest.mark.fast_test
@@ -351,7 +351,7 @@ def test_invalid_n_calls_arguments(minimizer):
 def test_repeated_x(minimizer):
     with pytest.warns(None) as record:
         minimizer(lambda x: x[0], dimensions=[[0, 1]], x0=[[0], [1]],
-                  n_initial_points=0, n_calls=3)
+                  n_random_starts=0, n_calls=3)
     assert len(record) > 0
     w = record.pop(UserWarning)
     assert issubclass(w.category, UserWarning)
@@ -359,7 +359,7 @@ def test_repeated_x(minimizer):
 
     with pytest.warns(None) as record:
         minimizer(bench4, dimensions=[("0", "1")], x0=[["0"], ["1"]],
-                  n_calls=3, n_initial_points=0)
+                  n_calls=3, n_random_starts=0)
         assert len(record) > 0
         w = record.pop(UserWarning)
         assert issubclass(w.category, UserWarning)
@@ -376,23 +376,23 @@ def test_consistent_x_iter_dimensions(minimizer):
     res = minimizer(bench1,
                     dimensions=[(0, 1), (2, 3)],
                     x0=[[0, 2], [1, 2]], n_calls=3,
-                    n_initial_points=0)
+                    n_random_starts=0)
     assert len(set(len(x) for x in res.x_iters)) == 1
     assert len(res.x_iters[0]) == 2
 
     # one dimensional problem
     res = minimizer(bench1, dimensions=[(0, 1)], x0=[[0], [1]], n_calls=3,
-                    n_initial_points=0)
+                    n_random_starts=0)
     assert len(set(len(x) for x in res.x_iters)) == 1
     assert len(res.x_iters[0]) == 1
 
     with pytest.raises(RuntimeError):
         minimizer(bench1, dimensions=[(0, 1)],
-                  x0=[[0, 1]], n_calls=3, n_initial_points=0)
+                  x0=[[0, 1]], n_calls=3, n_random_starts=0)
 
     with pytest.raises(RuntimeError):
         minimizer(bench1, dimensions=[(0, 1)],
-                  x0=[0, 1], n_calls=3, n_initial_points=0)
+                  x0=[0, 1], n_calls=3, n_random_starts=0)
 
 
 @pytest.mark.slow_test
@@ -405,7 +405,7 @@ def test_early_stopping_delta_x(minimizer):
                     dimensions=[(-1., 1.)],
                     x0=[[-0.1], [0.1], [-0.9]],
                     n_calls=n_calls,
-                    n_initial_points=0, random_state=1)
+                    n_random_starts=0, random_state=1)
     assert len(res.x_iters) < n_calls
 
 
@@ -420,7 +420,7 @@ def test_early_stopping_delta_x_empty_result_object(minimizer):
                     callback=DeltaXStopper(0.1),
                     dimensions=[(-1., 1.)],
                     n_calls=n_calls,
-                    n_initial_points=1, random_state=1)
+                    n_random_starts=1, random_state=1)
     assert len(res.x_iters) < n_calls
 
 
@@ -433,6 +433,6 @@ def bench1_with_time(x):
 
     n_calls = 3
     res = minimizer(bench1_with_time, [(-2.0, 2.0)],
-                    acq_func=acq_func, n_calls=n_calls, n_initial_points=1,
+                    acq_func=acq_func, n_calls=n_calls, n_random_starts=1,
                     random_state=1)
     assert len(res.log_time) == n_calls
diff --git a/skopt/tests/test_forest_opt.py b/skopt/tests/test_forest_opt.py
index b0aaf2566..204bee108 100644
--- a/skopt/tests/test_forest_opt.py
+++ b/skopt/tests/test_forest_opt.py
@@ -28,11 +28,11 @@ def test_forest_minimize_api(base_estimator):
 
 
 def check_minimize(minimizer, func, y_opt, dimensions, margin,
-                   n_calls, n_initial_points=10, x0=None):
+                   n_calls, n_random_starts=10, x0=None):
     for n in range(3):
         r = minimizer(
             func, dimensions, n_calls=n_calls, random_state=n,
-            n_initial_points=n_initial_points, x0=x0)
+            n_random_starts=n_random_starts, x0=x0)
         assert r.fun < y_opt + margin
 
 
@@ -64,5 +64,5 @@ def f(params):
 
     dims = [[1]]
     res = forest_minimize(f, dims, n_calls=1, random_state=1,
-                          n_initial_points=1)
+                          n_random_starts=1)
     assert res.x_iters[0][0] == dims[0][0]
diff --git a/skopt/tests/test_gp_opt.py b/skopt/tests/test_gp_opt.py
index ffd5f356f..8734aff5d 100644
--- a/skopt/tests/test_gp_opt.py
+++ b/skopt/tests/test_gp_opt.py
@@ -12,9 +12,9 @@
 
 
 def check_minimize(func, y_opt, bounds, acq_optimizer, acq_func,
-                   margin, n_calls, n_initial_points=10, init_gen="random"):
+                   margin, n_calls, n_random_starts=10, init_gen="random"):
     r = gp_minimize(func, bounds, acq_optimizer=acq_optimizer,
-                    acq_func=acq_func, n_initial_points=n_initial_points,
+                    acq_func=acq_func, n_random_starts=n_random_starts,
                     n_calls=n_calls, random_state=1,
                     initial_point_generator=init_gen,
                     noise=1e-10)
@@ -72,10 +72,10 @@ def test_gp_minimize_bench4(search, acq):
 @pytest.mark.fast_test
 def test_n_jobs():
     r_single = gp_minimize(bench3, [(-2.0, 2.0)], acq_optimizer="lbfgs",
-                           acq_func="EI", n_calls=2, n_initial_points=1,
+                           acq_func="EI", n_calls=2, n_random_starts=1,
                            random_state=1, noise=1e-10)
     r_double = gp_minimize(bench3, [(-2.0, 2.0)], acq_optimizer="lbfgs",
-                           acq_func="EI", n_calls=2, n_initial_points=1,
+                           acq_func="EI", n_calls=2, n_random_starts=1,
                            random_state=1, noise=1e-10, n_jobs=2)
     assert_array_equal(r_single.x_iters, r_double.x_iters)
 
@@ -83,7 +83,7 @@ def test_n_jobs():
 @pytest.mark.fast_test
 def test_gpr_default():
     """Smoke test that gp_minimize does not fail for default values."""
-    gp_minimize(branin, ((-5.0, 10.0), (0.0, 15.0)), n_initial_points=1,
+    gp_minimize(branin, ((-5.0, 10.0), (0.0, 15.0)), n_random_starts=1,
                 n_calls=2)
 
 
@@ -95,7 +95,7 @@ def test_use_given_estimator():
     noise_correct = 1e+5
     noise_fake = 1e-10
     estimator = cook_estimator("GP", domain, noise=noise_correct)
-    res = gp_minimize(branin, domain, n_calls=1, n_initial_points=1,
+    res = gp_minimize(branin, domain, n_calls=1, n_random_starts=1,
                       base_estimator=estimator, noise=noise_fake)
 
     assert res['models'][-1].noise == noise_correct
@@ -109,7 +109,7 @@ def test_use_given_estimator_with_max_model_size():
     noise_correct = 1e+5
     noise_fake = 1e-10
     estimator = cook_estimator("GP", domain, noise=noise_correct)
-    res = gp_minimize(branin, domain, n_calls=1, n_initial_points=1,
+    res = gp_minimize(branin, domain, n_calls=1, n_random_starts=1,
                       base_estimator=estimator, noise=noise_fake,
                       model_queue_size=1)
     assert len(res['models']) == 1
@@ -122,7 +122,7 @@ def f(params):
         return 0
 
     dims = [[1]]
-    res = gp_minimize(f, dims, n_calls=1, n_initial_points=1,
+    res = gp_minimize(f, dims, n_calls=1, n_random_starts=1,
                       random_state=1)
     assert res.x_iters[0][0] == dims[0][0]
 
diff --git a/skopt/tests/test_sampler.py b/skopt/tests/test_sampler.py
deleted file mode 100644
index e8cafb143..000000000
--- a/skopt/tests/test_sampler.py
+++ /dev/null
@@ -1,282 +0,0 @@
-import pytest
-import numbers
-import numpy as np
-import os
-import yaml
-from tempfile import NamedTemporaryFile
-
-from numpy.testing import assert_array_almost_equal
-from numpy.testing import assert_almost_equal
-from numpy.testing import assert_array_equal
-from numpy.testing import assert_equal
-from numpy.testing import assert_raises
-from scipy import spatial
-from skopt import Optimizer
-from skopt.space import Space
-from skopt.space import Real
-from skopt.space import Integer
-from skopt.space import Categorical
-from skopt.space import check_dimension as space_check_dimension
-from skopt.sampler.sobol import _bit_lo0, _bit_hi1
-from skopt.sampler.halton import _van_der_corput_samples, _create_primes
-from skopt.sampler import Hammersly, Halton, Lhs, Sobol, Grid
-from skopt.sampler import InitialPointGenerator
-from skopt.sampler.grid import _create_uniform_grid_include_border
-from skopt.sampler.grid import _create_uniform_grid_exclude_border
-from skopt.sampler.grid import _quadrature_combine
-from skopt.sampler.grid import _create_uniform_grid_only_border
-from skopt.utils import cook_initial_point_generator
-
-
-LHS_TYPE = ["classic", "centered"]
-CRITERION = ["maximin", "ratio", "correlation", None]
-SAMPLER = ["lhs", "halton", "sobol", "hammersly", "grid"]
-
-
-@pytest.mark.fast_test
-def test_lhs_centered():
-    lhs = Lhs(lhs_type="centered")
-    samples = lhs.generate([(0., 1.), ] * 3, 3)
-    assert_almost_equal(np.sum(samples), 4.5)
-
-
-@pytest.mark.parametrize("samlper", SAMPLER)
-def test_sampler(samlper):
-    s = cook_initial_point_generator(samlper)
-    samples = s.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
-    assert isinstance(s, InitialPointGenerator)
-
-    samples = s.generate([("a", "b", "c")], 3)
-    assert samples[0][0] in ["a", "b", "c"]
-
-    samples = s.generate([("a", "b", "c"), (0, 1)], 1)
-    assert samples[0][0] in ["a", "b", "c"]
-    assert samples[0][1] in [0, 1]
-
-    samples = s.generate([("a", "b", "c"), (0, 1)], 3)
-    assert samples[0][0] in ["a", "b", "c"]
-    assert samples[0][1] in [0, 1]
-
-
-@pytest.mark.parametrize("lhs_type", LHS_TYPE)
-@pytest.mark.parametrize("criterion", CRITERION)
-def test_lhs_criterion(lhs_type, criterion):
-    lhs = Lhs(lhs_type=lhs_type, criterion=criterion, iterations=100)
-    samples = lhs.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
-    samples = lhs.generate([("a", "b", "c")], 3)
-    assert samples[0][0] in ["a", "b", "c"]
-
-    samples = lhs.generate([("a", "b", "c"), (0, 1)], 1)
-    assert samples[0][0] in ["a", "b", "c"]
-    assert samples[0][1] in [0, 1]
-
-    samples = lhs.generate([("a", "b", "c"), (0, 1)], 3)
-    assert samples[0][0] in ["a", "b", "c"]
-    assert samples[0][1] in [0, 1]
-
-
-def test_lhs_pdist():
-    n_dim = 2
-    n_samples = 20
-    lhs = Lhs()
-
-    h = lhs._lhs_normalized(n_dim, n_samples, 0)
-    d_classic = spatial.distance.pdist(np.array(h), 'euclidean')
-    lhs = Lhs(criterion="maximin", iterations=100)
-    h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
-    d = spatial.distance.pdist(np.array(h), 'euclidean')
-    assert np.min(d) > np.min(d_classic)
-
-
-@pytest.mark.parametrize("criterion", CRITERION)
-def test_lhs_random_state(criterion):
-    n_dim = 2
-    n_samples = 20
-    lhs = Lhs()
-
-    h = lhs._lhs_normalized(n_dim, n_samples, 0)
-    h2 = lhs._lhs_normalized(n_dim, n_samples, 0)
-    assert_array_equal(h, h2)
-    lhs = Lhs(criterion=criterion, iterations=100)
-    h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
-    h2 = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
-    assert_array_equal(h, h2)
-
-
-@pytest.mark.fast_test
-def test_bit():
-    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-    res = [2, 1, 3, 1, 2, 1, 4, 1, 2, 1]
-    for i in range(len(X)):
-        assert _bit_lo0(X[i]) == res[i]
-
-    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-    res = [1, 2, 2, 3, 3, 3, 3, 4, 4, 4]
-    for i in range(len(X)):
-        assert _bit_hi1(X[i]) == res[i]
-
-
-@pytest.mark.fast_test
-def test_sobol():
-    sobol = Sobol()
-    x, seed = sobol._sobol(3, 1)
-    assert_array_equal(x, [0.5, 0.5, 0.5])
-    x, seed = sobol._sobol(3, 2)
-    assert_array_equal(x, [0.75, 0.25, 0.75])
-    x, seed = sobol._sobol(3, 3)
-    assert_array_equal(x, [0.25, 0.75, 0.25])
-    x, seed = sobol._sobol(3, 4)
-    assert_array_equal(x, [0.375, 0.375, 0.625])
-    x, seed = sobol._sobol(3, 5)
-    assert_array_equal(x, [0.875, 0.875, 0.125])
-    x, seed = sobol._sobol(3, 6)
-    assert_array_equal(x, [0.625, 0.125, 0.375])
-
-
-@pytest.mark.fast_test
-def test_generate():
-    sobol = Sobol(min_skip=1, max_skip=1)
-    x = sobol.generate([(0., 1.), ] * 3, 3)
-    x = np.array(x)
-    assert_array_equal(x[0, :], [0.5, 0.5, 0.5])
-    assert_array_equal(x[1, :], [0.75, 0.25, 0.75])
-    assert_array_equal(x[2, :], [0.25, 0.75, 0.25])
-
-    sobol.set_params(max_skip=2)
-    assert sobol.max_skip == 2
-    assert isinstance(sobol, InitialPointGenerator)
-
-
-@pytest.mark.fast_test
-def test_van_der_corput():
-    x = _van_der_corput_samples(range(11), number_base=10)
-    y = [0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9, 0.01, 0.11]
-    assert_array_equal(x, y)
-
-    x = _van_der_corput_samples(range(8), number_base=2)
-    y = [0.5, 0.25, 0.75, 0.125, 0.625, 0.375, 0.875, 0.0625]
-    assert_array_equal(x, y)
-
-
-@pytest.mark.fast_test
-def test_halton():
-    h = Halton()
-    x = h.generate([(0., 1.), ] * 2, 3)
-    y = np.array([[0.125, 0.625, 0.375], [0.4444, 0.7778, 0.2222]]).T
-    assert_array_almost_equal(x, y, 1e-3)
-
-    h = Halton()
-    x = h.generate([(0., 1.), ] * 2, 4)
-    y = np.array([[0.125, 0.625, 0.375, 0.875],
-                  [0.4444, 0.7778, 0.2222, 0.5556]]).T
-    assert_array_almost_equal(x, y, 1e-3)
-
-    samples = h.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
-
-
-@pytest.mark.fast_test
-def test_hammersly():
-    h = Hammersly()
-    x = h.generate([(0., 1.), ] * 2, 3)
-    y = np.array([[0.75, 0.125, 0.625], [0.25, 0.5, 0.75]]).T
-    assert_almost_equal(x, y)
-    x = h.generate([(0., 1.), ] * 2, 4)
-    y = np.array([[0.75, 0.125, 0.625, 0.375], [0.2, 0.4, 0.6, 0.8]]).T
-    assert_almost_equal(x, y)
-
-    samples = h.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
-
-
-@pytest.mark.fast_test
-def test_primes():
-
-    x = _create_primes(1)
-    assert_array_equal(x, [])
-    x = _create_primes(2)
-    assert_array_equal(x, [2])
-    x = _create_primes(3)
-    assert_array_equal(x, [2, 3])
-    x = _create_primes(20)
-    assert_array_equal(x, [2, 3, 5, 7, 11, 13, 17, 19])
-
-
-@pytest.mark.fast_test
-def test_quadrature_combine():
-    a = [1, 2]
-    b = [[4, 4], [5, 6]]
-    x = [[1, 4, 4], [1, 5, 6], [2, 4, 4], [2, 5, 6]]
-    x_test = _quadrature_combine([a, b])
-    assert_array_equal(x_test, x)
-
-
-@pytest.mark.fast_test
-def test_uniform_grid():
-    x = _create_uniform_grid_exclude_border(1, 2)
-    assert_array_equal(x, [[1./3.], [2./3.]])
-    x = _create_uniform_grid_include_border(1, 2)
-    assert_array_equal(x, [[0.], [1.]])
-    x = _create_uniform_grid_only_border(1, 2)
-    assert_array_equal(x, [[0.], [1.]])
-
-    x = _create_uniform_grid_exclude_border(1, 3)
-    assert_array_equal(x, [[1./4.], [2./4.], [3./4.]])
-    x = _create_uniform_grid_include_border(1, 3)
-    assert_array_equal(x, [[0./2.], [1./2.], [2./2.]])
-    x = _create_uniform_grid_only_border(1, 3)
-    assert_array_equal(x, [[0./2.], [1./2.], [2./2.]])
-
-    x = _create_uniform_grid_exclude_border(1, 5)
-    assert_array_equal(x, [[1./6.], [2./6.], [3./6.], [4./6.], [5./6.]])
-    x = _create_uniform_grid_include_border(1, 5)
-    assert_array_equal(x, [[0./4.], [1./4.], [2./4.], [3./4.], [4./4.]])
-    x = _create_uniform_grid_only_border(1, 5)
-    assert_array_equal(x, [[0./4.], [1./4.], [2./4.], [3./4.], [4./4.]])
-
-    x = _create_uniform_grid_exclude_border(2, 2)
-    assert_array_equal(x, [[1. / 3., 1./3.], [1. / 3., 2. / 3.],
-                           [2. / 3., 1. / 3.], [2. / 3., 2. / 3.]])
-    x = _create_uniform_grid_include_border(2, 2)
-    assert_array_equal(x, [[0., 0.], [0., 1.],
-                           [1., 0.], [1., 1.]])
-    x = _create_uniform_grid_only_border(2, 3)
-    assert_array_equal(x, [[0., 0.], [0., 0.5],
-                           [0., 1.], [1., 0.],
-                           [1., 0.5], [1., 1.]])
-
-    assert_raises(AssertionError, _create_uniform_grid_exclude_border, 1, 0)
-    assert_raises(AssertionError, _create_uniform_grid_exclude_border, 0, 1)
-    assert_raises(AssertionError, _create_uniform_grid_include_border, 1, 0)
-    assert_raises(AssertionError, _create_uniform_grid_include_border, 0, 1)
-    assert_raises(AssertionError, _create_uniform_grid_only_border, 1, 1)
-    assert_raises(AssertionError, _create_uniform_grid_only_border, 0, 2)
-
-
-@pytest.mark.fast_test
-def test_grid():
-    grid = Grid()
-    samples = grid.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
-
-    grid = Grid(border="include")
-    samples = grid.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
-
-    grid = Grid(use_full_layout=False)
-    samples = grid.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
-
-    grid = Grid(use_full_layout=True, append_border="include")
-    samples = grid.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
diff --git a/skopt/tests/test_samples.py b/skopt/tests/test_samples.py
new file mode 100644
index 000000000..bae6a1617
--- /dev/null
+++ b/skopt/tests/test_samples.py
@@ -0,0 +1,176 @@
+import pytest
+import numbers
+import numpy as np
+import os
+import yaml
+from tempfile import NamedTemporaryFile
+
+from numpy.testing import assert_array_almost_equal
+from numpy.testing import assert_almost_equal
+from numpy.testing import assert_array_equal
+from numpy.testing import assert_equal
+from numpy.testing import assert_raises_regex
+from scipy import spatial
+from skopt import Optimizer
+from skopt.space import Space
+from skopt.space import Real
+from skopt.space import Integer
+from skopt.space import Categorical
+from skopt.space import check_dimension as space_check_dimension
+from skopt.sampler.sobol import _bit_lo0, _bit_hi1
+from skopt.sampler.halton import _van_der_corput_samples, _create_primes
+from skopt.sampler import Hammersly, Halton, Lhs, Sobol
+
+
+@pytest.mark.fast_test
+def test_lhs_type():
+    lhs = Lhs(lhs_type="classic")
+    samples = lhs.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+    lhs = Lhs(lhs_type="centered")
+    samples = lhs.generate([(0., 1.), ] * 3, 3)
+    assert_almost_equal(np.sum(samples), 4.5)
+    samples = lhs.generate([("a", "b", "c")], 3)
+    assert samples[0][0] in ["a", "b", "c"]
+
+    samples = lhs.generate([("a", "b", "c"), (0, 1)], 1)
+    assert samples[0][0] in ["a", "b", "c"]
+    assert samples[0][1] in [0, 1]
+
+    samples = lhs.generate([("a", "b", "c"), (0, 1)], 3)
+    assert samples[0][0] in ["a", "b", "c"]
+    assert samples[0][1] in [0, 1]
+
+
+def test_lhs_criterion():
+    for criterion in ["maximin", "ratio", "correlation"]:
+        lhs = Lhs(criterion=criterion, iterations=100)
+        samples = lhs.generate([(0., 1.), ] * 2, 200)
+        assert len(samples) == 200
+        assert len(samples[0]) == 2
+
+
+def test_lhs_pdist():
+    n_dim = 2
+    n_samples = 20
+    lhs = Lhs()
+
+    h = lhs._lhs_normalized(n_dim, n_samples, 0)
+    d_classic = spatial.distance.pdist(np.array(h), 'euclidean')
+    lhs = Lhs(criterion="maximin", iterations=100)
+    h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
+    d = spatial.distance.pdist(np.array(h), 'euclidean')
+    assert np.min(d) > np.min(d_classic)
+
+
+def test_lhs_random_state():
+    n_dim = 2
+    n_samples = 20
+    lhs = Lhs()
+
+    h = lhs._lhs_normalized(n_dim, n_samples, 0)
+    h2 = lhs._lhs_normalized(n_dim, n_samples, 0)
+    assert_array_equal(h, h2)
+    for criterion in ["maximin", "ratio", "correlation"]:
+        lhs = Lhs(criterion=criterion, iterations=100)
+        h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
+        h2 = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
+        assert_array_equal(h, h2)
+
+
+@pytest.mark.fast_test
+def test_bit():
+    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    res = [2, 1, 3, 1, 2, 1, 4, 1, 2, 1]
+    for i in range(len(X)):
+        assert _bit_lo0(X[i]) == res[i]
+
+    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    res = [1, 2, 2, 3, 3, 3, 3, 4, 4, 4]
+    for i in range(len(X)):
+        assert _bit_hi1(X[i]) == res[i]
+
+
+@pytest.mark.fast_test
+def test_sobol():
+    sobol = Sobol()
+    x, seed = sobol._sobol(3, 1)
+    assert_array_equal(x, [0.5, 0.5, 0.5])
+    x, seed = sobol._sobol(3, 2)
+    assert_array_equal(x, [0.75, 0.25, 0.75])
+    x, seed = sobol._sobol(3, 3)
+    assert_array_equal(x, [0.25, 0.75, 0.25])
+    x, seed = sobol._sobol(3, 4)
+    assert_array_equal(x, [0.375, 0.375, 0.625])
+    x, seed = sobol._sobol(3, 5)
+    assert_array_equal(x, [0.875, 0.875, 0.125])
+    x, seed = sobol._sobol(3, 6)
+    assert_array_equal(x, [0.625, 0.125, 0.375])
+
+
+@pytest.mark.fast_test
+def test_generate():
+    sobol = Sobol(min_skip=1, max_skip=1)
+    x = sobol.generate([(0., 1.), ] * 3, 3)
+    x = np.array(x)
+    assert_array_equal(x[0, :], [0.5, 0.5, 0.5])
+    assert_array_equal(x[1, :], [0.75, 0.25, 0.75])
+    assert_array_equal(x[2, :], [0.25, 0.75, 0.25])
+
+
+@pytest.mark.fast_test
+def test_van_der_corput():
+    x = _van_der_corput_samples(range(11), number_base=10)
+    y = [0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9, 0.01, 0.11]
+    assert_array_equal(x, y)
+
+    x = _van_der_corput_samples(range(8), number_base=2)
+    y = [0.5, 0.25, 0.75, 0.125, 0.625, 0.375, 0.875, 0.0625]
+    assert_array_equal(x, y)
+
+
+@pytest.mark.fast_test
+def test_halton():
+    h = Halton()
+    x = h.generate([(0., 1.), ] * 2, 3)
+    y = np.array([[0.125, 0.625, 0.375], [0.4444, 0.7778, 0.2222]]).T
+    assert_array_almost_equal(x, y, 1e-3)
+
+    h = Halton()
+    x = h.generate([(0., 1.), ] * 2, 4)
+    y = np.array([[0.125, 0.625, 0.375, 0.875],
+                  [0.4444, 0.7778, 0.2222, 0.5556]]).T
+    assert_array_almost_equal(x, y, 1e-3)
+
+    samples = h.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+
+@pytest.mark.fast_test
+def test_hammersly():
+    h = Hammersly()
+    x = h.generate([(0., 1.), ] * 2, 3)
+    y = np.array([[0.75, 0.125, 0.625], [0.25, 0.5, 0.75]]).T
+    assert_almost_equal(x, y)
+    x = h.generate([(0., 1.), ] * 2, 4)
+    y = np.array([[0.75, 0.125, 0.625, 0.375], [0.2, 0.4, 0.6, 0.8]]).T
+    assert_almost_equal(x, y)
+
+    samples = h.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+
+@pytest.mark.fast_test
+def test_primes():
+
+    x = _create_primes(1)
+    assert_array_equal(x, [])
+    x = _create_primes(2)
+    assert_array_equal(x, [2])
+    x = _create_primes(3)
+    assert_array_equal(x, [2, 3])
+    x = _create_primes(20)
+    assert_array_equal(x, [2, 3, 5, 7, 11, 13, 17, 19])
diff --git a/skopt/utils.py b/skopt/utils.py
index 6c769d731..395413acf 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -16,8 +16,7 @@
 from .learning.gaussian_process.kernels import ConstantKernel
 from .learning.gaussian_process.kernels import HammingKernel
 from .learning.gaussian_process.kernels import Matern
-from .sampler import Sobol, Lhs, Hammersly, Halton, Grid
-from .sampler import InitialPointGenerator
+
 from .space import Space, Categorical, Integer, Real, Dimension
 
 
@@ -343,7 +342,7 @@ def cook_estimator(base_estimator, space=None, **kwargs):
     Parameters
     ----------
     base_estimator : "GP", "RF", "ET", "GBRT", "DUMMY"
-                        or sklearn regressor
+                        or sklearn regressor, default="GP"
         Should inherit from `sklearn.base.RegressorMixin`.
         In addition the `predict` method should have an optional `return_std`
         argument, which returns `std(Y | x)`` along with `E[Y | x]`.
@@ -407,52 +406,6 @@ def cook_estimator(base_estimator, space=None, **kwargs):
     return base_estimator
 
 
-def cook_initial_point_generator(generator, **kwargs):
-    """
-    Cook a default initial point generator.
-
-    For the special generator called "random" the return value is None.
-
-    Parameters
-    ----------
-    generator : "lhs", "sobol", "halton", "hammersly", "grid", "random"
-                        or InitialPointGenerator instance"
-        Should inherit from `skopt.sampler.InitialPointGenerator`.
-
-    kwargs : dict
-        Extra parameters provided to the generator at init time.
-    """
-    if generator is None:
-        generator = "random"
-    elif isinstance(generator, str):
-        generator = generator.lower()
-        if generator not in ["sobol", "halton", "hammersly", "lhs", "random",
-                             "grid"]:
-            raise ValueError("Valid strings for the generator parameter "
-                             " are: 'sobol', 'lhs', 'halton', 'hammersly',"
-                             "'random', or 'grid' not "
-                             "%s." % generator)
-    elif not isinstance(generator, InitialPointGenerator):
-        raise ValueError("generator has to be an InitialPointGenerator."
-                         "Got %s" % (str(type(generator))))
-
-    if isinstance(generator, str):
-        if generator == "sobol":
-            generator = Sobol()
-        elif generator == "halton":
-            generator = Halton()
-        elif generator == "hammersly":
-            generator = Hammersly()
-        elif generator == "lhs":
-            generator = Lhs()
-        elif generator == "grid":
-            generator = Grid()
-        elif generator == "random":
-            return None
-    generator.set_params(**kwargs)
-    return generator
-
-
 def dimensions_aslist(search_space):
     """Convert a dict representation of a search space into a list of
     dimensions, ordered by sorted(search_space.keys()).
@@ -815,3 +768,13 @@ def wrapper(x):
         return wrapper
 
     return decorator
+
+
+def random_permute_matrix(h, random_state=None):
+    rng = check_random_state(random_state)
+    h_rand_perm = np.zeros_like(h)
+    samples, n = h.shape
+    for j in range(n):
+        order = rng.permutation(range(samples))
+        h_rand_perm[:, j] = h[order, j]
+    return h_rand_perm

From 24b57d44d27fc6a9a9c4dff4cac5c7c7d8025e67 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 20:15:15 +0100
Subject: [PATCH 101/265] Revert "Revert "Merge remote-tracking branch
 'origin/Improve_circle_ci' into Improve_circle_ci""

This reverts commit ddca7f56ce7a154c2284d6464cf872a1f4992f3d.
---
 .travis.yml                                   |   8 +-
 build_tools/travis/test_script.sh             |   7 +-
 doc/modules/classes.rst                       |   1 +
 doc/modules/transformers.rst                  |   7 +
 .../initial-sampling-method-integer.py        |  13 +-
 examples/sampler/initial-sampling-method.py   |  13 +-
 examples/sampler/sampling_comparison.py       |  59 ++--
 pyproject.toml                                |   2 +-
 requirements.txt                              |   2 +-
 setup.py                                      |   2 +-
 skopt/__init__.py                             |   2 +-
 skopt/optimizer/base.py                       |  45 ++-
 skopt/optimizer/dummy.py                      |  19 +-
 skopt/optimizer/forest.py                     |  35 ++-
 skopt/optimizer/gbrt.py                       |  36 ++-
 skopt/optimizer/gp.py                         |  35 ++-
 skopt/optimizer/optimizer.py                  |  51 +---
 skopt/sampler/__init__.py                     |   5 +-
 skopt/sampler/base.py                         |  24 ++
 skopt/sampler/grid.py                         | 170 +++++++++++
 skopt/sampler/halton.py                       |   1 +
 skopt/sampler/lhs.py                          |  16 +-
 skopt/tests/test_common.py                    |  90 +++---
 skopt/tests/test_forest_opt.py                |   6 +-
 skopt/tests/test_gp_opt.py                    |  16 +-
 skopt/tests/test_sampler.py                   | 282 ++++++++++++++++++
 skopt/tests/test_samples.py                   | 176 -----------
 skopt/utils.py                                |  61 +++-
 28 files changed, 786 insertions(+), 398 deletions(-)
 create mode 100644 doc/modules/transformers.rst
 create mode 100644 skopt/sampler/grid.py
 create mode 100644 skopt/tests/test_sampler.py
 delete mode 100644 skopt/tests/test_samples.py

diff --git a/.travis.yml b/.travis.yml
index 871afe045..6c76b4ceb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,25 +22,25 @@ matrix:
     - name: "Python 3.5 - scikit 0.19.2"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.5"
-           NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.9.0"
+           NUMPY_VERSION="1.12.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.9.0"
            SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.12.0" SCIPY_VERSION="0.19.1" PYAML_VERSION="16.12.0"
+           NUMPY_VERSION="1.14.0" SCIPY_VERSION="0.19.1" PYAML_VERSION="16.12.0"
            SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.7 - scikit 0.21.3"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.13.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
+         NUMPY_VERSION="1.15.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
          SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.12"
     - name: "Python 3.7 - scikit 0.22.1"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.14.0" SCIPY_VERSION="1.2.0" PYAML_VERSION="18.11.0"
+         NUMPY_VERSION="1.16.0" SCIPY_VERSION="1.2.0" PYAML_VERSION="18.11.0"
          SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.13"
     - name: "Python 3.8  latest package versions"
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index a14e50706..b5c05b033 100644
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -11,10 +11,11 @@ set -e
 python --version
 python -c "import numpy; print('numpy %s' % numpy.__version__)"
 python -c "import scipy; print('scipy %s' % scipy.__version__)"
+python -c "import sklearn; print('sklearn %s' % sklearn.__version__)"
 python -c "\
 try:
-    import sklearn
-    print('sklearn %s' % sklearn.__version__)
+    import skopt
+    print('skopt %s' % skopt.__version__)
 except ImportError:
     pass
 "
@@ -36,7 +37,7 @@ run_tests() {
     export SKOPT_SKIP_NETWORK_TESTS=1
 
     if [[ "$COVERAGE" == "true" ]]; then
-        TEST_CMD="$TEST_CMD --cov sklearn"
+        TEST_CMD="$TEST_CMD --cov skopt"
     fi
 
     if [[ -n "$CHECK_WARNINGS" ]]; then
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index b255dfbb6..f90ebba73 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -212,6 +212,7 @@ details.
    :template: function.rst
 
     utils.cook_estimator
+    utils.cook_initial_point_generator
     utils.dimensions_aslist
     utils.expected_minimum
     utils.expected_minimum_random_sampling
diff --git a/doc/modules/transformers.rst b/doc/modules/transformers.rst
new file mode 100644
index 000000000..34693f287
--- /dev/null
+++ b/doc/modules/transformers.rst
@@ -0,0 +1,7 @@
+.. currentmodule:: skopt.space.transformers
+
+.. _transformers:
+
+Transformers
+============
+
diff --git a/examples/sampler/initial-sampling-method-integer.py b/examples/sampler/initial-sampling-method-integer.py
index 7430584ba..c66a9508b 100644
--- a/examples/sampler/initial-sampling-method-integer.py
+++ b/examples/sampler/initial-sampling-method-integer.py
@@ -31,6 +31,7 @@
 from skopt.sampler import Lhs
 from skopt.sampler import Halton
 from skopt.sampler import Hammersly
+from skopt.sampler import Grid
 from scipy.spatial.distance import pdist
 
 #############################################################################
@@ -50,7 +51,6 @@ def plot_searchspace(x, title):
 
 n_samples = 10
 space = Space([(0, 5), (0, 5)])
-space.set_transformer("normalize")
 
 #############################################################################
 # Random sampling
@@ -151,6 +151,17 @@ def plot_searchspace(x, title):
 pdist_data.append(pdist(x).flatten())
 x_label.append("hammersly")
 
+#############################################################################
+# Grid sampling
+# -------------
+
+grid = Grid(border="include", use_full_layout=False)
+x = grid.generate(space.dimensions, n_samples)
+plot_searchspace(x, 'Grid')
+print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
+pdist_data.append(pdist(x).flatten())
+x_label.append("grid")
+
 #############################################################################
 # Pdist boxplot of all methods
 # ----------------------------
diff --git a/examples/sampler/initial-sampling-method.py b/examples/sampler/initial-sampling-method.py
index bbb33e4c5..ad0e7f903 100644
--- a/examples/sampler/initial-sampling-method.py
+++ b/examples/sampler/initial-sampling-method.py
@@ -32,6 +32,7 @@
 from skopt.sampler import Lhs
 from skopt.sampler import Halton
 from skopt.sampler import Hammersly
+from skopt.sampler import Grid
 from scipy.spatial.distance import pdist
 
 #############################################################################
@@ -50,7 +51,7 @@ def plot_searchspace(x, title):
 n_samples = 10
 
 space = Space([(-5., 10.), (0., 15.)])
-space.set_transformer("normalize")
+# space.set_transformer("normalize")
 
 #############################################################################
 # Random sampling
@@ -142,6 +143,16 @@ def plot_searchspace(x, title):
 pdist_data.append(pdist(x).flatten())
 x_label.append("hammersly")
 
+#############################################################################
+# Grid sampling
+# -------------
+
+grid = Grid(border="include", use_full_layout=False)
+x = grid.generate(space.dimensions, n_samples)
+plot_searchspace(x, 'Grid')
+pdist_data.append(pdist(x).flatten())
+x_label.append("grid")
+
 #############################################################################
 # Pdist boxplot of all methods
 # ----------------------------
diff --git a/examples/sampler/sampling_comparison.py b/examples/sampler/sampling_comparison.py
index d027ef366..819f5e06d 100644
--- a/examples/sampler/sampling_comparison.py
+++ b/examples/sampler/sampling_comparison.py
@@ -78,23 +78,20 @@ def plot_convergence(result_list, true_minimum=None, yscale=None, title="Converg
     return ax
 
 
-def run(minimizer, initial_point_generator, init_point_gen_kwargs,
-        n_random_starts=10, n_repeats=1):
-    return [minimizer(func, bounds, n_random_starts=n_random_starts,
+def run(minimizer, initial_point_generator,
+        n_initial_points=10, n_repeats=1):
+    return [minimizer(func, bounds, n_initial_points=n_initial_points,
                       initial_point_generator=initial_point_generator,
-                      init_point_gen_kwargs=init_point_gen_kwargs,
                       n_calls=n_calls, random_state=n)
             for n in range(n_repeats)]
 
 
-def run_measure(initial_point_generator,
-                n_random_starts=10,
-                init_point_gen_kwargs=None):
+def run_measure(initial_point_generator, n_initial_points=10):
     start = time.time()
     # n_repeats must set to a much higher value to obtain meaningful results.
     n_repeats = 1
-    res = run(gp_minimize, initial_point_generator, init_point_gen_kwargs,
-              n_random_starts=n_random_starts, n_repeats=n_repeats)
+    res = run(gp_minimize, initial_point_generator,
+              n_initial_points=n_initial_points, n_repeats=n_repeats)
     duration = time.time() - start
     # print("%s %s: %.2f s" % (initial_point_generator,
     #                          str(init_point_gen_kwargs),
@@ -121,7 +118,7 @@ def run_measure(initial_point_generator,
     bounds = [(0., 1.), ] * 6
     true_minimum = -3.32237
     n_calls = 40
-    n_random_starts = 10
+    n_initial_points = 10
     yscale = None
     title = "Convergence plot - hart6"
 else:
@@ -129,25 +126,26 @@ def run_measure(initial_point_generator,
     bounds = [(-5.0, 10.0), (0.0, 15.0)]
     true_minimum = 0.397887
     n_calls = 30
-    n_random_starts = 10
+    n_initial_points = 10
     yscale="log"
     title = "Convergence plot - branin"
 
 #############################################################################
-
+from skopt.utils import cook_initial_point_generator
 
 # Random search
-dummy_res = run_measure( "random", n_random_starts)
-lhs_res = run_measure("lhs", n_random_starts,
-                      {"lhs_type": "classic",
-                       "criterion": None})
-lhs2_res = run_measure("lhs", n_random_starts,
-                       {"criterion": "maximin"})
-sobol_res = run_measure("sobol", n_random_starts,
-                        {"randomize": False,
-                         "min_skip": 1, "max_skip": 100})
-halton_res = run_measure("halton", n_random_starts)
-hammersly_res = run_measure("hammersly", n_random_starts)
+dummy_res = run_measure("random", n_initial_points)
+lhs = cook_initial_point_generator(
+    "lhs", lhs_type="classic", criterion=None)
+lhs_res = run_measure(lhs, n_initial_points)
+lhs2 = cook_initial_point_generator("lhs", criterion="maximin")
+lhs2_res = run_measure(lhs2, n_initial_points)
+sobol = cook_initial_point_generator("sobol", randomize=False,
+                                     min_skip=1, max_skip=100)
+sobol_res = run_measure(sobol, n_initial_points)
+halton_res = run_measure("halton", n_initial_points)
+hammersly_res = run_measure("hammersly", n_initial_points)
+grid_res = run_measure("grid", n_initial_points)
 
 #############################################################################
 # Note that this can take a few minutes.
@@ -157,7 +155,8 @@ def run_measure(initial_point_generator,
                         ("lhs_maximin", lhs2_res),
                         ("sobol", sobol_res),
                         ("halton", halton_res),
-                        ("hammersly", hammersly_res)],
+                        ("hammersly", hammersly_res),
+                        ("grid", grid_res)],
                         true_minimum=true_minimum,
                         yscale=yscale,
                         title=title)
@@ -172,12 +171,10 @@ def run_measure(initial_point_generator,
 
 #############################################################################
 # Test with different n_random_starts values
-lhs2_15_res = run_measure("lhs", 12,
-                          {"criterion": "maximin"})
-lhs2_20_res = run_measure("lhs", 14,
-                          {"criterion": "maximin"})
-lhs2_25_res = run_measure("lhs", 16,
-                          {"criterion": "maximin"})
+lhs2 = cook_initial_point_generator("lhs", criterion="maximin")
+lhs2_15_res = run_measure(lhs2, 12)
+lhs2_20_res = run_measure(lhs2, 14)
+lhs2_25_res = run_measure(lhs2, 16)
 
 #############################################################################
 # n_random_starts = 10 produces the best results
@@ -191,4 +188,4 @@ def run_measure(initial_point_generator,
                         yscale=yscale,
                         title=title)
 
-plt.show()
\ No newline at end of file
+plt.show()
diff --git a/pyproject.toml b/pyproject.toml
index 6e5347cb2..6fad80b64 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@
 requires = [
     "setuptools",
     "wheel",
-    "numpy>=1.11",
+    "numpy>=1.12",
     "scipy>=0.18",
     "scikit-learn>=0.19.1",
     "pyaml>=16.9",
diff --git a/requirements.txt b/requirements.txt
index 18985117d..5f2ef0417 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-numpy>=1.11
+numpy>=1.12
 scipy>=0.18
 scikit-learn>=0.19
 matplotlib>=2.0.0
diff --git a/setup.py b/setup.py
index 981892675..3b311924e 100644
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,7 @@
       classifiers=CLASSIFIERS,
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
                 'skopt.learning.gaussian_process', 'skopt.sampler'],
-      install_requires=['joblib>=0.11', 'pyaml>=16.9', 'numpy>=1.11.0',
+      install_requires=['joblib>=0.11', 'pyaml>=16.9', 'numpy>=1.12.0',
                         'scipy>=0.18.0',
                         'scikit-learn>=0.19.1'],
       extras_require={
diff --git a/skopt/__init__.py b/skopt/__init__.py
index 33c2ade38..1239ffa5d 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -65,7 +65,7 @@
         "learning",
         "optimizer",
         "plots",
-        "samples",
+        "sampler",
         "space",
         "gp_minimize",
         "dummy_minimize",
diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index 259dc5076..d0784ac1c 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -6,6 +6,7 @@
 
 import copy
 import inspect
+import warnings
 import numbers
 try:
     from collections.abc import Iterable
@@ -21,13 +22,13 @@
 
 
 def base_minimize(func, dimensions, base_estimator,
-                  n_calls=100, n_random_starts=10,
+                  n_calls=100, n_random_starts=None,
+                  n_initial_points=10,
                   initial_point_generator="random",
                   acq_func="EI", acq_optimizer="lbfgs",
                   x0=None, y0=None, random_state=None, verbose=False,
                   callback=None, n_points=10000, n_restarts_optimizer=5,
-                  xi=0.01, kappa=1.96, n_jobs=1, model_queue_size=None,
-                  init_point_gen_kwargs=None):
+                  xi=0.01, kappa=1.96, n_jobs=1, model_queue_size=None):
     """Base optimizer class
     Parameters
     ----------
@@ -36,7 +37,7 @@ def base_minimize(func, dimensions, base_estimator,
         and return the objective value.
     
         If you have a search-space where all dimensions have names,
-        then you can use `skopt.utils.use_named_args` as a decorator
+        then you can use :func:`skopt.utils.use_named_args` as a decorator
         on your objective function, in order to call it directly
         with the named arguments. See `use_named_args` for an example.
 
@@ -68,9 +69,16 @@ def base_minimize(func, dimensions, base_estimator,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+        .. deprecated:: 0.9
+            use `n_initial_points` instead.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    n_initial_points : int, default=10
+        Number of evaluations of `func` with initialization points
+        before approximating it with `base_estimator`. Initial point
+        generator can be changed by setting `initial_point_generator`.
+
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
@@ -78,6 +86,7 @@ def base_minimize(func, dimensions, base_estimator,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
+        - "grid" for a uniform grid sequence
 
     acq_func : string, default=`"EI"`
         Function to minimize over the posterior distribution. Can be either
@@ -175,9 +184,6 @@ def base_minimize(func, dimensions, base_estimator,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -223,20 +229,28 @@ def base_minimize(func, dimensions, base_estimator,
         x0 = [x0]
     if not isinstance(x0, list):
         raise ValueError("`x0` should be a list, but got %s" % type(x0))
-    if n_random_starts <= 0 and not x0:
-        raise ValueError("Either set `n_random_starts` > 0,"
+
+    # Check `n_random_starts` deprecation first
+    if n_random_starts is not None:
+        warnings.warn(("n_random_starts will be removed in favour of "
+                       "n_initial_points. It overwrites n_initial_points."),
+                      DeprecationWarning)
+        n_initial_points = n_random_starts
+
+    if n_initial_points <= 0 and not x0:
+        raise ValueError("Either set `n_initial_points` > 0,"
                          " or provide `x0`")
     # check y0: list-like, requirement of maximal calls
     if isinstance(y0, Iterable):
         y0 = list(y0)
     elif isinstance(y0, numbers.Number):
         y0 = [y0]
-    required_calls = n_random_starts + (len(x0) if not y0 else 0)
+    required_calls = n_initial_points + (len(x0) if not y0 else 0)
     if n_calls < required_calls:
         raise ValueError(
             "Expected `n_calls` >= %d, got %d" % (required_calls, n_calls))
     # calculate the total number of initial points
-    n_initial_points = n_random_starts + len(x0)
+    n_initial_points = n_initial_points + len(x0)
 
     # Build optimizer
 
@@ -248,8 +262,7 @@ def base_minimize(func, dimensions, base_estimator,
                           random_state=random_state,
                           model_queue_size=model_queue_size,
                           acq_optimizer_kwargs=acq_optimizer_kwargs,
-                          acq_func_kwargs=acq_func_kwargs,
-                          init_point_gen_kwargs=init_point_gen_kwargs)
+                          acq_func_kwargs=acq_func_kwargs)
     # check x0: element-wise data type, dimensionality
     assert all(isinstance(p, Iterable) for p in x0)
     if not all(len(p) == optimizer.space.n_dims for p in x0):
@@ -260,7 +273,7 @@ def base_minimize(func, dimensions, base_estimator,
     if verbose:
         callbacks.append(VerboseCallback(
             n_init=len(x0) if not y0 else 0,
-            n_random=n_random_starts,
+            n_random=n_initial_points,
             n_total=n_calls))
 
     # Record provided points
diff --git a/skopt/optimizer/dummy.py b/skopt/optimizer/dummy.py
index 6bf43caa9..1e432d1de 100644
--- a/skopt/optimizer/dummy.py
+++ b/skopt/optimizer/dummy.py
@@ -16,7 +16,7 @@ def dummy_minimize(func, dimensions, n_calls=100,
         and return the objective value.
     
         If you have a search-space where all dimensions have names,
-        then you can use `skopt.utils.use_named_args` as a decorator
+        then you can use :func:`skopt.utils.use_named_args` as a decorator
         on your objective function, in order to call it directly
         with the named arguments. See `use_named_args` for an example.
 
@@ -35,8 +35,8 @@ def dummy_minimize(func, dimensions, n_calls=100,
     n_calls : int, default=100
         Number of calls to `func` to find the minimum.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
@@ -44,6 +44,7 @@ def dummy_minimize(func, dimensions, n_calls=100,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
+        - "grid" for a uniform grid sequence
 
     x0 : list, list of lists or `None`
         Initial input points.
@@ -79,9 +80,6 @@ def dummy_minimize(func, dimensions, n_calls=100,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -110,17 +108,16 @@ def dummy_minimize(func, dimensions, n_calls=100,
     # all our calls want random suggestions, except if we need to evaluate
     # some initial points
     if x0 is not None and y0 is None:
-        n_random_calls = n_calls - len(x0)
+        n_initial_points = n_calls - len(x0)
     else:
-        n_random_calls = n_calls
+        n_initial_points = n_calls
 
     return base_minimize(func, dimensions, base_estimator="dummy",
                          # explicitly set optimizer to sampling as "dummy"
                          # minimizer does not provide gradients.
                          acq_optimizer="sampling",
-                         n_calls=n_calls, n_random_starts=n_random_calls,
+                         n_calls=n_calls, n_initial_points=n_initial_points,
                          initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state,
                          verbose=verbose,
-                         callback=callback, model_queue_size=model_queue_size,
-                         init_point_gen_kwargs=init_point_gen_kwargs)
+                         callback=callback, model_queue_size=model_queue_size)
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index ad51ef72a..b734e3589 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -8,12 +8,11 @@
 
 
 def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
-                    n_random_starts=10, acq_func="EI",
+                    n_random_starts=None, n_initial_points=10, acq_func="EI",
                     initial_point_generator="random",
                     x0=None, y0=None, random_state=None, verbose=False,
                     callback=None, n_points=10000, xi=0.01, kappa=1.96,
-                    n_jobs=1, model_queue_size=None,
-                    init_point_gen_kwargs=None):
+                    n_jobs=1, model_queue_size=None):
     """Sequential optimisation using decision trees.
 
     A tree based regression model is used to model the expensive to evaluate
@@ -23,13 +22,16 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
 
     The total number of evaluations, `n_calls`, are performed like the
     following. If `x0` is provided but not `y0`, then the elements of `x0`
-    are first evaluated, followed by `n_random_starts` evaluations.
-    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
+    are first evaluated, followed by `n_initial_points` evaluations.
+    Finally, `n_calls - len(x0) - n_initial_points` evaluations are
     made guided by the surrogate model. If `x0` and `y0` are both
-    provided then `n_random_starts` evaluations are first made then
-    `n_calls - n_random_starts` subsequent evaluations are made
+    provided then `n_initial_points` evaluations are first made then
+    `n_calls - n_initial_points` subsequent evaluations are made
     guided by the surrogate model.
 
+    The first `n_initial_points` are generated by the
+    `initial_point_generator`.
+
     Parameters
     ----------
     func : callable
@@ -76,9 +78,16 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+        .. deprecated:: 0.9
+            use `n_initial_points` instead.
+
+    n_initial_points : int, default=10
+        Number of evaluations of `func` with initialization points
+        before approximating it with `base_estimator`. Initial point
+        generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
@@ -86,6 +95,7 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
+        - "grid" for a uniform grid sequence
 
     acq_func : string, default="LCB"
         Function to minimize over the forest posterior. Can be either
@@ -151,9 +161,6 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -184,10 +191,10 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
     return base_minimize(func, dimensions, base_estimator,
                          n_calls=n_calls, n_points=n_points,
                          n_random_starts=n_random_starts,
+                         n_initial_points=n_initial_points,
                          initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state,
                          acq_func=acq_func,
                          xi=xi, kappa=kappa, verbose=verbose,
                          callback=callback, acq_optimizer="sampling",
-                         model_queue_size=model_queue_size,
-                         init_point_gen_kwargs=init_point_gen_kwargs)
+                         model_queue_size=model_queue_size)
diff --git a/skopt/optimizer/gbrt.py b/skopt/optimizer/gbrt.py
index 18f51ce94..2196a4d5c 100644
--- a/skopt/optimizer/gbrt.py
+++ b/skopt/optimizer/gbrt.py
@@ -6,13 +6,13 @@
 
 
 def gbrt_minimize(func, dimensions, base_estimator=None,
-                  n_calls=100, n_random_starts=10,
+                  n_calls=100, n_random_starts=None,
+                  n_initial_points=10,
                   initial_point_generator="random",
                   acq_func="EI", acq_optimizer="auto",
                   x0=None, y0=None, random_state=None, verbose=False,
                   callback=None, n_points=10000, xi=0.01, kappa=1.96,
-                  n_jobs=1, model_queue_size=None,
-                  init_point_gen_kwargs=None):
+                  n_jobs=1, model_queue_size=None):
     """Sequential optimization using gradient boosted trees.
 
     Gradient boosted regression trees are used to model the (very)
@@ -23,13 +23,16 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
 
     The total number of evaluations, `n_calls`, are performed like the
     following. If `x0` is provided but not `y0`, then the elements of `x0`
-    are first evaluated, followed by `n_random_starts` evaluations.
-    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
+    are first evaluated, followed by `n_initial_points` evaluations.
+    Finally, `n_calls - len(x0) - n_initial_points` evaluations are
     made guided by the surrogate model. If `x0` and `y0` are both
-    provided then `n_random_starts` evaluations are first made then
-    `n_calls - n_random_starts` subsequent evaluations are made
+    provided then `n_initial_points` evaluations are first made then
+    `n_calls - n_initial_points` subsequent evaluations are made
     guided by the surrogate model.
 
+    The first `n_initial_points` are generated by the
+    `initial_point_generator`.
+
     Parameters
     ----------
     func : callable
@@ -62,9 +65,16 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+        .. deprecated:: 0.9
+            use `n_initial_points` instead.
+
+    n_initial_points : int, default=10
+        Number of evaluations of `func` with initialization points
+        before approximating it with `base_estimator`. Initial point
+        generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
@@ -72,6 +82,7 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
         - "lhs" for a latin hypercube sequence,
+        - "grid" for a uniform grid sequence
 
     acq_func : string, default=`"LCB"`
         Function to minimize over the forest posterior. Can be either
@@ -135,9 +146,6 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -174,9 +182,9 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
     return base_minimize(func, dimensions, base_estimator,
                          n_calls=n_calls, n_points=n_points,
                          n_random_starts=n_random_starts,
+                         n_initial_points=n_initial_points,
                          initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state, xi=xi,
                          kappa=kappa, acq_func=acq_func, verbose=verbose,
                          callback=callback, acq_optimizer="sampling",
-                         model_queue_size=model_queue_size,
-                         init_point_gen_kwargs=init_point_gen_kwargs)
+                         model_queue_size=model_queue_size)
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index a6091578d..dac3e3e9f 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -10,13 +10,13 @@
 
 
 def gp_minimize(func, dimensions, base_estimator=None,
-                n_calls=100, n_random_starts=10,
+                n_calls=100, n_random_starts=None,
+                n_initial_points=10,
                 initial_point_generator="random",
                 acq_func="gp_hedge", acq_optimizer="auto", x0=None, y0=None,
                 random_state=None, verbose=False, callback=None,
                 n_points=10000, n_restarts_optimizer=5, xi=0.01, kappa=1.96,
-                noise="gaussian", n_jobs=1, model_queue_size=None,
-                init_point_gen_kwargs=None):
+                noise="gaussian", n_jobs=1, model_queue_size=None):
     """Bayesian optimization using Gaussian Processes.
 
     If every function evaluation is expensive, for instance
@@ -34,13 +34,16 @@ def gp_minimize(func, dimensions, base_estimator=None,
 
     The total number of evaluations, `n_calls`, are performed like the
     following. If `x0` is provided but not `y0`, then the elements of `x0`
-    are first evaluated, followed by `n_random_starts` evaluations.
-    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
+    are first evaluated, followed by `n_initial_points` evaluations.
+    Finally, `n_calls - len(x0) - n_initial_points` evaluations are
     made guided by the surrogate model. If `x0` and `y0` are both
-    provided then `n_random_starts` evaluations are first made then
-    `n_calls - n_random_starts` subsequent evaluations are made
+    provided then `n_initial_points` evaluations are first made then
+    `n_calls - n_initial_points` subsequent evaluations are made
     guided by the surrogate model.
 
+    The first `n_initial_points` are generated by the
+    `initial_point_generator`.
+
     Parameters
     ----------
     func : callable
@@ -88,9 +91,16 @@ def gp_minimize(func, dimensions, base_estimator=None,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+        .. deprecated:: 0.9
+            use `n_initial_points` instead.
+
+    n_initial_points : int, default=10
+        Number of evaluations of `func` with initialization points
+        before approximating it with `base_estimator`. Initial point
+        generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
@@ -234,9 +244,6 @@ def gp_minimize(func, dimensions, base_estimator=None,
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     Returns
     -------
     res : `OptimizeResult`, scipy object
@@ -281,8 +288,8 @@ def gp_minimize(func, dimensions, base_estimator=None,
         acq_func=acq_func,
         xi=xi, kappa=kappa, acq_optimizer=acq_optimizer, n_calls=n_calls,
         n_points=n_points, n_random_starts=n_random_starts,
+        n_initial_points=n_initial_points,
         initial_point_generator=initial_point_generator,
         n_restarts_optimizer=n_restarts_optimizer,
         x0=x0, y0=y0, random_state=rng, verbose=verbose,
-        callback=callback, n_jobs=n_jobs, model_queue_size=model_queue_size,
-        init_point_gen_kwargs=init_point_gen_kwargs)
+        callback=callback, n_jobs=n_jobs, model_queue_size=model_queue_size)
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index 97bb481be..bdd929f1e 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -16,7 +16,6 @@
 from ..acquisition import _gaussian_acquisition
 from ..acquisition import gaussian_acquisition_1D
 from ..learning import GaussianProcessRegressor
-from ..sampler import Sobol, Lhs, Hammersly, Halton
 from ..space import Categorical
 from ..space import Space
 from ..utils import check_x_in_space
@@ -26,6 +25,7 @@
 from ..utils import is_listlike
 from ..utils import is_2Dlistlike
 from ..utils import normalize_dimensions
+from ..utils import cook_initial_point_generator
 
 
 class Optimizer(object):
@@ -52,8 +52,8 @@ class Optimizer(object):
         - an instance of a `Dimension` object (`Real`, `Integer` or
           `Categorical`).
 
-    base_estimator : `"GP"`, `"RF"`, `"ET"`, `"GBRT"` or sklearn regressor,
-    default=`"GP"`
+    base_estimator : `"GP"`, `"RF"`, `"ET"`, `"GBRT"` or sklearn regressor, \
+            default=`"GP"`
         Should inherit from :obj:`sklearn.base.RegressorMixin`.
         In addition the `predict` method, should have an optional `return_std`
         argument, which returns `std(Y | x)`` along with `E[Y | x]`.
@@ -62,7 +62,7 @@ class Optimizer(object):
         is used in the minimize functions.
 
     n_random_starts : int, default=10
-        .. deprecated::
+        .. deprecated:: 0.9
             use `n_initial_points` instead.
 
     n_initial_points : int, default=10
@@ -70,15 +70,16 @@ class Optimizer(object):
         before approximating it with `base_estimator`. Initial point
         generator can be changed by setting `initial_point_generator`.
 
-    initial_point_generator : str, InitialPointGenerator instance,
-    default='random'
+    initial_point_generator : str, InitialPointGenerator instance, \
+            default='random'
         Sets a initial points generator. Can be either
 
         - "random" for uniform random numbers,
         - "sobol" for a Sobol sequence,
         - "halton" for a Halton sequence,
         - "hammersly" for a Hammersly sequence,
-        - "lhs" for a latin hypercube sequence
+        - "lhs" for a latin hypercube sequence,
+        - "grid" for a uniform grid sequence
 
     acq_func : string, default=`"gp_hedge"`
         Function to minimize over the posterior distribution. Can be either
@@ -131,9 +132,6 @@ class Optimizer(object):
     acq_optimizer_kwargs : dict
         Additional arguments to be passed to the acquistion optimizer.
 
-    init_point_gen_kwargs : dict
-        Additional arguments to be passed to the initial_point_generator
-
     model_queue_size : int or None, default=None
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
@@ -161,8 +159,7 @@ def __init__(self, dimensions, base_estimator="gp",
                  random_state=None,
                  model_queue_size=None,
                  acq_func_kwargs=None,
-                 acq_optimizer_kwargs=None,
-                 init_point_gen_kwargs=None):
+                 acq_optimizer_kwargs=None):
 
         self.rng = check_random_state(random_state)
 
@@ -262,29 +259,10 @@ def __init__(self, dimensions, base_estimator="gp",
         self.space = Space(dimensions)
 
         self._initial_samples = None
-        self._initial_point_generator = initial_point_generator
-        if init_point_gen_kwargs is None:
-            init_point_gen_kwargs = dict()
-        self.init_point_gen_kwargs = init_point_gen_kwargs
-        if initial_point_generator != "random" and \
-                isinstance(initial_point_generator, str):
-            if initial_point_generator == "sobol":
-                self._initial_point_generator = Sobol(
-                    **self.init_point_gen_kwargs)
-            elif initial_point_generator == "halton":
-                self._initial_point_generator = Halton(
-                    **self.init_point_gen_kwargs)
-            elif initial_point_generator == "hammersly":
-                self._initial_point_generator = Hammersly(
-                    **self.init_point_gen_kwargs)
-            elif initial_point_generator == "lhs":
-                self._initial_point_generator = Lhs(
-                    **self.init_point_gen_kwargs)
-            else:
-                raise ValueError(
-                    "Unkown initial_point_generator: " +
-                    str(initial_point_generator)
-                )
+        self._initial_point_generator = cook_initial_point_generator(
+            initial_point_generator)
+
+        if self._initial_point_generator is not None:
             transformer = self.space.get_transformer()
             self._initial_samples = self._initial_point_generator.generate(
                 self.space.dimensions, n_initial_points,
@@ -333,8 +311,7 @@ def copy(self, random_state=None):
             acq_optimizer=self.acq_optimizer,
             acq_func_kwargs=self.acq_func_kwargs,
             acq_optimizer_kwargs=self.acq_optimizer_kwargs,
-            init_point_gen_kwargs=self.init_point_gen_kwargs,
-            random_state=random_state,
+            random_state=random_state
         )
         optimizer._initial_samples = self._initial_samples
         if hasattr(self, "gains_"):
diff --git a/skopt/sampler/__init__.py b/skopt/sampler/__init__.py
index c8c67693c..61224ac69 100644
--- a/skopt/sampler/__init__.py
+++ b/skopt/sampler/__init__.py
@@ -5,9 +5,12 @@
 from .sobol import Sobol
 from .halton import Halton
 from .hammersly import Hammersly
+from .grid import Grid
+from .base import InitialPointGenerator
 
 
 __all__ = [
     "Lhs", "Sobol",
-    "Halton", "Hammersly"
+    "Halton", "Hammersly",
+    "Grid", "InitialPointGenerator"
 ]
diff --git a/skopt/sampler/base.py b/skopt/sampler/base.py
index 7811328c3..39dc6af5c 100644
--- a/skopt/sampler/base.py
+++ b/skopt/sampler/base.py
@@ -1,4 +1,28 @@
 
+from collections import defaultdict
+
+
 class InitialPointGenerator(object):
     def generate(self, dimensions, n_samples, random_state=None):
         raise NotImplemented
+
+    def set_params(self, **params):
+        """
+        Set the parameters of this initial point generator.
+
+        Parameters
+        ----------
+        **params : dict
+            Generator parameters.
+        Returns
+        -------
+        self : object
+            Generator instance.
+        """
+        if not params:
+            # Simple optimization to gain speed (inspect is slow)
+            return self
+        for key, value in params.items():
+            setattr(self, key, value)
+
+        return self
diff --git a/skopt/sampler/grid.py b/skopt/sampler/grid.py
new file mode 100644
index 000000000..2e90d7a2e
--- /dev/null
+++ b/skopt/sampler/grid.py
@@ -0,0 +1,170 @@
+"""
+Inspired by https://github.com/jonathf/chaospy/blob/master/chaospy/
+distributions/sampler/sequences/grid.py
+"""
+import numpy as np
+from .base import InitialPointGenerator
+from ..space import Space
+from sklearn.utils import check_random_state
+
+
+def _quadrature_combine(args):
+    args = [np.asarray(arg).reshape(len(arg), -1) for arg in args]
+    shapes = [arg.shape for arg in args]
+
+    size = np.prod(shapes, 0)[0] * np.sum(shapes, 0)[1]
+    if size > 10 ** 9:
+        raise MemoryError("Too large sets")
+
+    out = args[0]
+    for arg in args[1:]:
+        out = np.hstack([
+            np.tile(out, len(arg)).reshape(-1, out.shape[1]),
+            np.tile(arg.T, len(out)).reshape(arg.shape[1], -1).T,
+        ])
+    return out
+
+
+def _create_uniform_grid_exclude_border(n_dim, order):
+    assert order > 0
+    assert n_dim > 0
+    x_data = np.arange(1, order + 1) / (order + 1.)
+    x_data = _quadrature_combine([x_data] * n_dim)
+    return x_data
+
+
+def _create_uniform_grid_include_border(n_dim, order):
+    assert order > 1
+    assert n_dim > 0
+    x_data = np.arange(0, order) / (order - 1.)
+    x_data = _quadrature_combine([x_data] * n_dim)
+    return x_data
+
+
+def _create_uniform_grid_only_border(n_dim, order):
+    assert n_dim > 0
+    assert order > 1
+    x = [[0., 1.]] * (n_dim - 1)
+    x.append(list(np.arange(0, order) / (order - 1.)))
+    x_data = _quadrature_combine(x)
+    return x_data
+
+
+class Grid(InitialPointGenerator):
+    """Generate samples from a regular grid.
+
+    Parameters
+    ----------
+    border : str, default='exclude'
+        defines how the samples are generated:
+        - 'include' : Includes the border into the grid layout
+        - 'exclude' : Excludes the border from the grid layout
+        - 'only' : Selects only points at the border of the dimension
+    use_full_layout : boolean, default=True
+        When True, a  full factorial design is generated and
+        missing points are taken from the next larger full factorial
+        design, depending on `append_border`
+        When False, the next larger  full factorial design is
+        generated and points are randomly selected from it.
+    append_border : str, default="only"
+        When use_full_layout is True, this parameter defines how the missing
+        points will be generated from the next larger grid layout:
+        - 'include' : Includes the border into the grid layout
+        - 'exclude' : Excludes the border from the grid layout
+        - 'only' : Selects only points at the border of the dimension
+    """
+
+    def __init__(self, border="exclude", use_full_layout=True,
+                 append_border="only"):
+        self.border = border
+        self.use_full_layout = use_full_layout
+        self.append_border = append_border
+
+    def generate(self, dimensions, n_samples, random_state=None):
+        """Creates samples from a regular grid.
+
+        Parameters
+        ----------
+        dimensions : list, shape (n_dims,)
+            List of search space dimensions.
+            Each search dimension can be defined either as
+
+            - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
+              dimensions),
+            - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
+              dimensions),
+            - as a list of categories (for `Categorical` dimensions), or
+            - an instance of a `Dimension` object (`Real`, `Integer` or
+              `Categorical`).
+
+        n_samples : int
+            The order of the Halton sequence. Defines the number of samples.
+        random_state : int, RandomState instance, or None (default)
+            Set random state to something other than None for reproducible
+            results.
+
+        Returns
+        -------
+        np.array, shape=(n_dim, n_samples)
+            grid set
+        """
+        rng = check_random_state(random_state)
+        space = Space(dimensions)
+        n_dim = space.n_dims
+        transformer = space.get_transformer()
+        space.set_transformer("normalize")
+
+        if self.border == "include":
+            if self.use_full_layout:
+                order = int(np.floor(np.sqrt(n_samples)))
+            else:
+                order = int(np.ceil(np.sqrt(n_samples)))
+            if order < 2:
+                order = 2
+            h = _create_uniform_grid_include_border(n_dim, order)
+        elif self.border == "exclude":
+            if self.use_full_layout:
+                order = int(np.floor(np.sqrt(n_samples)))
+            else:
+                order = int(np.ceil(np.sqrt(n_samples)))
+            if order < 1:
+                order = 1
+            h = _create_uniform_grid_exclude_border(n_dim, order)
+        elif self.border == "only":
+            if self.use_full_layout:
+                order = int(np.floor(n_samples / 2.))
+            else:
+                order = int(np.ceil(n_samples / 2.))
+            if order < 2:
+                order = 2
+            h = _create_uniform_grid_exclude_border(n_dim, order)
+        else:
+            raise ValueError("Wrong value for border")
+        if np.size(h, 0) > n_samples:
+            rng.shuffle(h)
+            h = h[:n_samples, :]
+        elif np.size(h, 0) < n_samples:
+            if self.append_border == "only":
+                order = int(np.ceil((n_samples - np.size(h, 0)) / 2.))
+                if order < 2:
+                    order = 2
+                h2 = _create_uniform_grid_only_border(n_dim, order)
+            elif self.append_border == "include":
+                order = int(np.ceil(np.sqrt(n_samples - np.size(h, 0))))
+                if order < 2:
+                    order = 2
+                h2 = _create_uniform_grid_include_border(n_dim, order)
+            elif self.append_border == "exclude":
+                order = int(np.ceil(np.sqrt(n_samples - np.size(h, 0))))
+                if order < 1:
+                    order = 1
+                h2 = _create_uniform_grid_exclude_border(n_dim, order)
+            else:
+                raise ValueError("Wrong value for append_border")
+            h = np.vstack((h, h2[:(n_samples - np.size(h, 0))]))
+            rng.shuffle(h)
+        else:
+            rng.shuffle(h)
+        h = space.inverse_transform(h)
+        space.set_transformer(transformer)
+        return h
diff --git a/skopt/sampler/halton.py b/skopt/sampler/halton.py
index e8ea9fc8d..55b48ffb6 100644
--- a/skopt/sampler/halton.py
+++ b/skopt/sampler/halton.py
@@ -58,6 +58,7 @@ def generate(self, dimensions, n_samples, random_state=None):
         random_state : int, RandomState instance, or None (default)
             Set random state to something other than None for reproducible
             results.
+
         Returns
         -------
         np.array, shape=(n_dim, n_samples)
diff --git a/skopt/sampler/lhs.py b/skopt/sampler/lhs.py
index 416a90770..d63a15bb3 100644
--- a/skopt/sampler/lhs.py
+++ b/skopt/sampler/lhs.py
@@ -6,11 +6,20 @@
 import numpy as np
 from sklearn.utils import check_random_state
 from scipy import spatial
-from ..utils import random_permute_matrix
 from ..space import Space, Categorical
 from .base import InitialPointGenerator
 
 
+def _random_permute_matrix(h, random_state=None):
+    rng = check_random_state(random_state)
+    h_rand_perm = np.zeros_like(h)
+    samples, n = h.shape
+    for j in range(n):
+        order = rng.permutation(range(samples))
+        h_rand_perm[:, j] = h[order, j]
+    return h_rand_perm
+
+
 class Lhs(InitialPointGenerator):
     """Latin hypercube sampling
 
@@ -84,7 +93,8 @@ def generate(self, dimensions, n_samples, random_state=None):
                     # Generate a random LHS
                     h = self._lhs_normalized(n_dim, n_samples, rng)
                     r = np.corrcoef(np.array(h).T)
-                    if np.max(np.abs(r[r != 1])) < mincorr:
+                    if len(np.abs(r[r != 1])) > 0 and \
+                            np.max(np.abs(r[r != 1])) < mincorr:
                         mincorr = np.max(np.abs(r - np.eye(r.shape[0])))
                         h_opt = h.copy()
                         h_opt = space.inverse_transform(h_opt)
@@ -132,4 +142,4 @@ def _lhs_normalized(self, n_dim, n_samples, random_state):
                 h[:, j] = u[:, j] * np.diff(x) + x[:n_samples]
         else:
             raise ValueError("Wrong lhs_type. Got ".format(self.lhs_type))
-        return random_permute_matrix(h, random_state=rng)
+        return _random_permute_matrix(h, random_state=rng)
diff --git a/skopt/tests/test_common.py b/skopt/tests/test_common.py
index 9b299e43f..ac9cbdf78 100644
--- a/skopt/tests/test_common.py
+++ b/skopt/tests/test_common.py
@@ -113,11 +113,11 @@ def test_minimizer_api_dummy_minimize(verbose, call):
 @pytest.mark.parametrize("minimizer", MINIMIZERS)
 def test_minimizer_api(verbose, call, minimizer):
     n_calls = 7
-    n_random_starts = 3
-    n_models = n_calls - n_random_starts + 1
+    n_initial_points = 3
+    n_models = n_calls - n_initial_points + 1
 
     result = minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                       n_random_starts=n_random_starts,
+                       n_initial_points=n_initial_points,
                        n_calls=n_calls,
                        random_state=1,
                        verbose=verbose, callback=call)
@@ -133,10 +133,10 @@ def test_minimizer_api(verbose, call, minimizer):
 def test_minimizer_api_random_only(minimizer):
     # no models should be fit as we only evaluate at random points
     n_calls = 5
-    n_random_starts = 5
+    n_initial_points = 5
 
     result = minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                       n_random_starts=n_random_starts,
+                       n_initial_points=n_initial_points,
                        n_calls=n_calls,
                        random_state=1)
 
@@ -150,15 +150,15 @@ def test_fixed_random_states(minimizer):
     # check that two runs produce exactly same results, if not there is a
     # random state somewhere that is not reproducible
     n_calls = 4
-    n_random_starts = 2
+    n_initial_points = 2
 
     space = [(-5.0, 10.0), (0.0, 15.0)]
     result1 = minimizer(branin, space, n_calls=n_calls,
-                        n_random_starts=n_random_starts, random_state=1)
+                        n_initial_points=n_initial_points, random_state=1)
 
     dimensions = [(-5.0, 10.0), (0.0, 15.0)]
     result2 = minimizer(branin, dimensions, n_calls=n_calls,
-                        n_random_starts=n_random_starts, random_state=1)
+                        n_initial_points=n_initial_points, random_state=1)
 
     assert_array_almost_equal(result1.x_iters, result2.x_iters)
     assert_array_almost_equal(result1.func_vals, result2.func_vals)
@@ -170,28 +170,28 @@ def test_minimizer_with_space(minimizer):
     # check we can pass a Space instance as dimensions argument and get same
     # result
     n_calls = 4
-    n_random_starts = 2
+    n_initial_points = 2
 
     space = Space([(-5.0, 10.0), (0.0, 15.0)])
     space_result = minimizer(branin, space, n_calls=n_calls,
-                             n_random_starts=n_random_starts, random_state=1)
+                             n_initial_points=n_initial_points, random_state=1)
 
     check_minimizer_api(space_result, n_calls)
     check_minimizer_bounds(space_result, n_calls)
 
     dimensions = [(-5.0, 10.0), (0.0, 15.0)]
     result = minimizer(branin, dimensions, n_calls=n_calls,
-                       n_random_starts=n_random_starts, random_state=1)
+                       n_initial_points=n_initial_points, random_state=1)
 
     assert_array_almost_equal(space_result.x_iters, result.x_iters)
     assert_array_almost_equal(space_result.func_vals, result.func_vals)
 
 
 @pytest.mark.slow_test
-@pytest.mark.parametrize("n_random_starts", [0, 1, 2, 3, 4])
+@pytest.mark.parametrize("n_initial_points", [0, 1, 2, 3, 4])
 @pytest.mark.parametrize("optimizer_func",
                          [gp_minimize, forest_minimize, gbrt_minimize])
-def test_init_vals_and_models(n_random_starts, optimizer_func):
+def test_init_vals_and_models(n_initial_points, optimizer_func):
     # test how many models are fitted when using initial points, y0 values
     # and random starts
     space = [(-5.0, 10.0), (0.0, 15.0)]
@@ -199,40 +199,40 @@ def test_init_vals_and_models(n_random_starts, optimizer_func):
     y0 = list(map(branin, x0))
     n_calls = 7
 
-    optimizer = partial(optimizer_func, n_random_starts=n_random_starts)
+    optimizer = partial(optimizer_func, n_initial_points=n_initial_points)
     res = optimizer(branin, space, x0=x0, y0=y0, random_state=0,
                     n_calls=n_calls)
 
-    assert_equal(len(res.models), n_calls - n_random_starts + 1)
+    assert_equal(len(res.models), n_calls - n_initial_points + 1)
 
 
 @pytest.mark.slow_test
-@pytest.mark.parametrize("n_random_starts", [0, 1, 2, 3, 4])
+@pytest.mark.parametrize("n_initial_points", [0, 1, 2, 3, 4])
 @pytest.mark.parametrize("optimizer_func",
                          [gp_minimize, forest_minimize, gbrt_minimize])
-def test_init_points_and_models(n_random_starts, optimizer_func):
+def test_init_points_and_models(n_initial_points, optimizer_func):
     # test how many models are fitted when using initial points and random
     # starts (no y0 in this case)
     space = [(-5.0, 10.0), (0.0, 15.0)]
     x0 = [[1, 2], [3, 4], [5, 6]]
     n_calls = 7
 
-    optimizer = partial(optimizer_func, n_random_starts=n_random_starts)
+    optimizer = partial(optimizer_func, n_initial_points=n_initial_points)
     res = optimizer(branin, space, x0=x0, random_state=0,
                     n_calls=n_calls)
-    assert_equal(len(res.models), n_calls - len(x0) - n_random_starts + 1)
+    assert_equal(len(res.models), n_calls - len(x0) - n_initial_points + 1)
 
 
 @pytest.mark.slow_test
-@pytest.mark.parametrize("n_random_starts", [0, 5])
+@pytest.mark.parametrize("n_initial_points", [0, 5])
 @pytest.mark.parametrize("optimizer_func",
                          [gp_minimize, forest_minimize, gbrt_minimize])
-def test_init_vals(n_random_starts, optimizer_func):
+def test_init_vals(n_initial_points, optimizer_func):
     space = [(-5.0, 10.0), (0.0, 15.0)]
     x0 = [[1, 2], [3, 4], [5, 6]]
-    n_calls = len(x0) + n_random_starts + 1
+    n_calls = len(x0) + n_initial_points + 1
 
-    optimizer = partial(optimizer_func, n_random_starts=n_random_starts)
+    optimizer = partial(optimizer_func, n_initial_points=n_initial_points)
     check_init_vals(optimizer, branin, space, x0, n_calls)
 
 
@@ -247,9 +247,9 @@ def test_init_vals_dummy_minimize():
 @pytest.mark.slow_test
 @pytest.mark.parametrize("optimizer", [
         dummy_minimize,
-        partial(gp_minimize, n_random_starts=0),
-        partial(forest_minimize, n_random_starts=0),
-        partial(gbrt_minimize, n_random_starts=0)])
+        partial(gp_minimize, n_initial_points=0),
+        partial(forest_minimize, n_initial_points=0),
+        partial(gbrt_minimize, n_initial_points=0)])
 def test_categorical_init_vals(optimizer):
     space = [("-2", "-1", "0", "1", "2")]
     x0 = [["0"], ["1"], ["2"]]
@@ -260,9 +260,9 @@ def test_categorical_init_vals(optimizer):
 @pytest.mark.slow_test
 @pytest.mark.parametrize("optimizer", [
         dummy_minimize,
-        partial(gp_minimize, n_random_starts=0),
-        partial(forest_minimize, n_random_starts=0),
-        partial(gbrt_minimize, n_random_starts=0)])
+        partial(gp_minimize, n_initial_points=0),
+        partial(forest_minimize, n_initial_points=0),
+        partial(gbrt_minimize, n_initial_points=0)])
 def test_mixed_spaces(optimizer):
     space = [("-2", "-1", "0", "1", "2"), (-2.0, 2.0)]
     x0 = [["0", 2.0], ["1", 1.0], ["2", 1.0]]
@@ -326,24 +326,24 @@ def test_invalid_n_calls_arguments(minimizer):
 
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                  n_random_starts=0, random_state=1)
+                  n_initial_points=0, random_state=1)
 
-    # n_calls >= n_random_starts
+    # n_calls >= n_initial_points
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
-                  n_calls=1, n_random_starts=10, random_state=1)
+                  n_calls=1, n_initial_points=10, random_state=1)
 
-    # n_calls >= n_random_starts + len(x0)
+    # n_calls >= n_initial_points + len(x0)
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)], n_calls=1,
                   x0=[[-1, 2], [-3, 3], [2, 5]], random_state=1,
-                  n_random_starts=7)
+                  n_initial_points=7)
 
-    # n_calls >= n_random_starts
+    # n_calls >= n_initial_points
     with pytest.raises(ValueError):
         minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)], n_calls=1,
                   x0=[[-1, 2], [-3, 3], [2, 5]], y0=[2.0, 3.0, 5.0],
-                  random_state=1, n_random_starts=7)
+                  random_state=1, n_initial_points=7)
 
 
 @pytest.mark.fast_test
@@ -351,7 +351,7 @@ def test_invalid_n_calls_arguments(minimizer):
 def test_repeated_x(minimizer):
     with pytest.warns(None) as record:
         minimizer(lambda x: x[0], dimensions=[[0, 1]], x0=[[0], [1]],
-                  n_random_starts=0, n_calls=3)
+                  n_initial_points=0, n_calls=3)
     assert len(record) > 0
     w = record.pop(UserWarning)
     assert issubclass(w.category, UserWarning)
@@ -359,7 +359,7 @@ def test_repeated_x(minimizer):
 
     with pytest.warns(None) as record:
         minimizer(bench4, dimensions=[("0", "1")], x0=[["0"], ["1"]],
-                  n_calls=3, n_random_starts=0)
+                  n_calls=3, n_initial_points=0)
         assert len(record) > 0
         w = record.pop(UserWarning)
         assert issubclass(w.category, UserWarning)
@@ -376,23 +376,23 @@ def test_consistent_x_iter_dimensions(minimizer):
     res = minimizer(bench1,
                     dimensions=[(0, 1), (2, 3)],
                     x0=[[0, 2], [1, 2]], n_calls=3,
-                    n_random_starts=0)
+                    n_initial_points=0)
     assert len(set(len(x) for x in res.x_iters)) == 1
     assert len(res.x_iters[0]) == 2
 
     # one dimensional problem
     res = minimizer(bench1, dimensions=[(0, 1)], x0=[[0], [1]], n_calls=3,
-                    n_random_starts=0)
+                    n_initial_points=0)
     assert len(set(len(x) for x in res.x_iters)) == 1
     assert len(res.x_iters[0]) == 1
 
     with pytest.raises(RuntimeError):
         minimizer(bench1, dimensions=[(0, 1)],
-                  x0=[[0, 1]], n_calls=3, n_random_starts=0)
+                  x0=[[0, 1]], n_calls=3, n_initial_points=0)
 
     with pytest.raises(RuntimeError):
         minimizer(bench1, dimensions=[(0, 1)],
-                  x0=[0, 1], n_calls=3, n_random_starts=0)
+                  x0=[0, 1], n_calls=3, n_initial_points=0)
 
 
 @pytest.mark.slow_test
@@ -405,7 +405,7 @@ def test_early_stopping_delta_x(minimizer):
                     dimensions=[(-1., 1.)],
                     x0=[[-0.1], [0.1], [-0.9]],
                     n_calls=n_calls,
-                    n_random_starts=0, random_state=1)
+                    n_initial_points=0, random_state=1)
     assert len(res.x_iters) < n_calls
 
 
@@ -420,7 +420,7 @@ def test_early_stopping_delta_x_empty_result_object(minimizer):
                     callback=DeltaXStopper(0.1),
                     dimensions=[(-1., 1.)],
                     n_calls=n_calls,
-                    n_random_starts=1, random_state=1)
+                    n_initial_points=1, random_state=1)
     assert len(res.x_iters) < n_calls
 
 
@@ -433,6 +433,6 @@ def bench1_with_time(x):
 
     n_calls = 3
     res = minimizer(bench1_with_time, [(-2.0, 2.0)],
-                    acq_func=acq_func, n_calls=n_calls, n_random_starts=1,
+                    acq_func=acq_func, n_calls=n_calls, n_initial_points=1,
                     random_state=1)
     assert len(res.log_time) == n_calls
diff --git a/skopt/tests/test_forest_opt.py b/skopt/tests/test_forest_opt.py
index 204bee108..b0aaf2566 100644
--- a/skopt/tests/test_forest_opt.py
+++ b/skopt/tests/test_forest_opt.py
@@ -28,11 +28,11 @@ def test_forest_minimize_api(base_estimator):
 
 
 def check_minimize(minimizer, func, y_opt, dimensions, margin,
-                   n_calls, n_random_starts=10, x0=None):
+                   n_calls, n_initial_points=10, x0=None):
     for n in range(3):
         r = minimizer(
             func, dimensions, n_calls=n_calls, random_state=n,
-            n_random_starts=n_random_starts, x0=x0)
+            n_initial_points=n_initial_points, x0=x0)
         assert r.fun < y_opt + margin
 
 
@@ -64,5 +64,5 @@ def f(params):
 
     dims = [[1]]
     res = forest_minimize(f, dims, n_calls=1, random_state=1,
-                          n_random_starts=1)
+                          n_initial_points=1)
     assert res.x_iters[0][0] == dims[0][0]
diff --git a/skopt/tests/test_gp_opt.py b/skopt/tests/test_gp_opt.py
index 8734aff5d..ffd5f356f 100644
--- a/skopt/tests/test_gp_opt.py
+++ b/skopt/tests/test_gp_opt.py
@@ -12,9 +12,9 @@
 
 
 def check_minimize(func, y_opt, bounds, acq_optimizer, acq_func,
-                   margin, n_calls, n_random_starts=10, init_gen="random"):
+                   margin, n_calls, n_initial_points=10, init_gen="random"):
     r = gp_minimize(func, bounds, acq_optimizer=acq_optimizer,
-                    acq_func=acq_func, n_random_starts=n_random_starts,
+                    acq_func=acq_func, n_initial_points=n_initial_points,
                     n_calls=n_calls, random_state=1,
                     initial_point_generator=init_gen,
                     noise=1e-10)
@@ -72,10 +72,10 @@ def test_gp_minimize_bench4(search, acq):
 @pytest.mark.fast_test
 def test_n_jobs():
     r_single = gp_minimize(bench3, [(-2.0, 2.0)], acq_optimizer="lbfgs",
-                           acq_func="EI", n_calls=2, n_random_starts=1,
+                           acq_func="EI", n_calls=2, n_initial_points=1,
                            random_state=1, noise=1e-10)
     r_double = gp_minimize(bench3, [(-2.0, 2.0)], acq_optimizer="lbfgs",
-                           acq_func="EI", n_calls=2, n_random_starts=1,
+                           acq_func="EI", n_calls=2, n_initial_points=1,
                            random_state=1, noise=1e-10, n_jobs=2)
     assert_array_equal(r_single.x_iters, r_double.x_iters)
 
@@ -83,7 +83,7 @@ def test_n_jobs():
 @pytest.mark.fast_test
 def test_gpr_default():
     """Smoke test that gp_minimize does not fail for default values."""
-    gp_minimize(branin, ((-5.0, 10.0), (0.0, 15.0)), n_random_starts=1,
+    gp_minimize(branin, ((-5.0, 10.0), (0.0, 15.0)), n_initial_points=1,
                 n_calls=2)
 
 
@@ -95,7 +95,7 @@ def test_use_given_estimator():
     noise_correct = 1e+5
     noise_fake = 1e-10
     estimator = cook_estimator("GP", domain, noise=noise_correct)
-    res = gp_minimize(branin, domain, n_calls=1, n_random_starts=1,
+    res = gp_minimize(branin, domain, n_calls=1, n_initial_points=1,
                       base_estimator=estimator, noise=noise_fake)
 
     assert res['models'][-1].noise == noise_correct
@@ -109,7 +109,7 @@ def test_use_given_estimator_with_max_model_size():
     noise_correct = 1e+5
     noise_fake = 1e-10
     estimator = cook_estimator("GP", domain, noise=noise_correct)
-    res = gp_minimize(branin, domain, n_calls=1, n_random_starts=1,
+    res = gp_minimize(branin, domain, n_calls=1, n_initial_points=1,
                       base_estimator=estimator, noise=noise_fake,
                       model_queue_size=1)
     assert len(res['models']) == 1
@@ -122,7 +122,7 @@ def f(params):
         return 0
 
     dims = [[1]]
-    res = gp_minimize(f, dims, n_calls=1, n_random_starts=1,
+    res = gp_minimize(f, dims, n_calls=1, n_initial_points=1,
                       random_state=1)
     assert res.x_iters[0][0] == dims[0][0]
 
diff --git a/skopt/tests/test_sampler.py b/skopt/tests/test_sampler.py
new file mode 100644
index 000000000..e8cafb143
--- /dev/null
+++ b/skopt/tests/test_sampler.py
@@ -0,0 +1,282 @@
+import pytest
+import numbers
+import numpy as np
+import os
+import yaml
+from tempfile import NamedTemporaryFile
+
+from numpy.testing import assert_array_almost_equal
+from numpy.testing import assert_almost_equal
+from numpy.testing import assert_array_equal
+from numpy.testing import assert_equal
+from numpy.testing import assert_raises
+from scipy import spatial
+from skopt import Optimizer
+from skopt.space import Space
+from skopt.space import Real
+from skopt.space import Integer
+from skopt.space import Categorical
+from skopt.space import check_dimension as space_check_dimension
+from skopt.sampler.sobol import _bit_lo0, _bit_hi1
+from skopt.sampler.halton import _van_der_corput_samples, _create_primes
+from skopt.sampler import Hammersly, Halton, Lhs, Sobol, Grid
+from skopt.sampler import InitialPointGenerator
+from skopt.sampler.grid import _create_uniform_grid_include_border
+from skopt.sampler.grid import _create_uniform_grid_exclude_border
+from skopt.sampler.grid import _quadrature_combine
+from skopt.sampler.grid import _create_uniform_grid_only_border
+from skopt.utils import cook_initial_point_generator
+
+
+LHS_TYPE = ["classic", "centered"]
+CRITERION = ["maximin", "ratio", "correlation", None]
+SAMPLER = ["lhs", "halton", "sobol", "hammersly", "grid"]
+
+
+@pytest.mark.fast_test
+def test_lhs_centered():
+    lhs = Lhs(lhs_type="centered")
+    samples = lhs.generate([(0., 1.), ] * 3, 3)
+    assert_almost_equal(np.sum(samples), 4.5)
+
+
+@pytest.mark.parametrize("samlper", SAMPLER)
+def test_sampler(samlper):
+    s = cook_initial_point_generator(samlper)
+    samples = s.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+    assert isinstance(s, InitialPointGenerator)
+
+    samples = s.generate([("a", "b", "c")], 3)
+    assert samples[0][0] in ["a", "b", "c"]
+
+    samples = s.generate([("a", "b", "c"), (0, 1)], 1)
+    assert samples[0][0] in ["a", "b", "c"]
+    assert samples[0][1] in [0, 1]
+
+    samples = s.generate([("a", "b", "c"), (0, 1)], 3)
+    assert samples[0][0] in ["a", "b", "c"]
+    assert samples[0][1] in [0, 1]
+
+
+@pytest.mark.parametrize("lhs_type", LHS_TYPE)
+@pytest.mark.parametrize("criterion", CRITERION)
+def test_lhs_criterion(lhs_type, criterion):
+    lhs = Lhs(lhs_type=lhs_type, criterion=criterion, iterations=100)
+    samples = lhs.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+    samples = lhs.generate([("a", "b", "c")], 3)
+    assert samples[0][0] in ["a", "b", "c"]
+
+    samples = lhs.generate([("a", "b", "c"), (0, 1)], 1)
+    assert samples[0][0] in ["a", "b", "c"]
+    assert samples[0][1] in [0, 1]
+
+    samples = lhs.generate([("a", "b", "c"), (0, 1)], 3)
+    assert samples[0][0] in ["a", "b", "c"]
+    assert samples[0][1] in [0, 1]
+
+
+def test_lhs_pdist():
+    n_dim = 2
+    n_samples = 20
+    lhs = Lhs()
+
+    h = lhs._lhs_normalized(n_dim, n_samples, 0)
+    d_classic = spatial.distance.pdist(np.array(h), 'euclidean')
+    lhs = Lhs(criterion="maximin", iterations=100)
+    h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
+    d = spatial.distance.pdist(np.array(h), 'euclidean')
+    assert np.min(d) > np.min(d_classic)
+
+
+@pytest.mark.parametrize("criterion", CRITERION)
+def test_lhs_random_state(criterion):
+    n_dim = 2
+    n_samples = 20
+    lhs = Lhs()
+
+    h = lhs._lhs_normalized(n_dim, n_samples, 0)
+    h2 = lhs._lhs_normalized(n_dim, n_samples, 0)
+    assert_array_equal(h, h2)
+    lhs = Lhs(criterion=criterion, iterations=100)
+    h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
+    h2 = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
+    assert_array_equal(h, h2)
+
+
+@pytest.mark.fast_test
+def test_bit():
+    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    res = [2, 1, 3, 1, 2, 1, 4, 1, 2, 1]
+    for i in range(len(X)):
+        assert _bit_lo0(X[i]) == res[i]
+
+    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    res = [1, 2, 2, 3, 3, 3, 3, 4, 4, 4]
+    for i in range(len(X)):
+        assert _bit_hi1(X[i]) == res[i]
+
+
+@pytest.mark.fast_test
+def test_sobol():
+    sobol = Sobol()
+    x, seed = sobol._sobol(3, 1)
+    assert_array_equal(x, [0.5, 0.5, 0.5])
+    x, seed = sobol._sobol(3, 2)
+    assert_array_equal(x, [0.75, 0.25, 0.75])
+    x, seed = sobol._sobol(3, 3)
+    assert_array_equal(x, [0.25, 0.75, 0.25])
+    x, seed = sobol._sobol(3, 4)
+    assert_array_equal(x, [0.375, 0.375, 0.625])
+    x, seed = sobol._sobol(3, 5)
+    assert_array_equal(x, [0.875, 0.875, 0.125])
+    x, seed = sobol._sobol(3, 6)
+    assert_array_equal(x, [0.625, 0.125, 0.375])
+
+
+@pytest.mark.fast_test
+def test_generate():
+    sobol = Sobol(min_skip=1, max_skip=1)
+    x = sobol.generate([(0., 1.), ] * 3, 3)
+    x = np.array(x)
+    assert_array_equal(x[0, :], [0.5, 0.5, 0.5])
+    assert_array_equal(x[1, :], [0.75, 0.25, 0.75])
+    assert_array_equal(x[2, :], [0.25, 0.75, 0.25])
+
+    sobol.set_params(max_skip=2)
+    assert sobol.max_skip == 2
+    assert isinstance(sobol, InitialPointGenerator)
+
+
+@pytest.mark.fast_test
+def test_van_der_corput():
+    x = _van_der_corput_samples(range(11), number_base=10)
+    y = [0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9, 0.01, 0.11]
+    assert_array_equal(x, y)
+
+    x = _van_der_corput_samples(range(8), number_base=2)
+    y = [0.5, 0.25, 0.75, 0.125, 0.625, 0.375, 0.875, 0.0625]
+    assert_array_equal(x, y)
+
+
+@pytest.mark.fast_test
+def test_halton():
+    h = Halton()
+    x = h.generate([(0., 1.), ] * 2, 3)
+    y = np.array([[0.125, 0.625, 0.375], [0.4444, 0.7778, 0.2222]]).T
+    assert_array_almost_equal(x, y, 1e-3)
+
+    h = Halton()
+    x = h.generate([(0., 1.), ] * 2, 4)
+    y = np.array([[0.125, 0.625, 0.375, 0.875],
+                  [0.4444, 0.7778, 0.2222, 0.5556]]).T
+    assert_array_almost_equal(x, y, 1e-3)
+
+    samples = h.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+
+@pytest.mark.fast_test
+def test_hammersly():
+    h = Hammersly()
+    x = h.generate([(0., 1.), ] * 2, 3)
+    y = np.array([[0.75, 0.125, 0.625], [0.25, 0.5, 0.75]]).T
+    assert_almost_equal(x, y)
+    x = h.generate([(0., 1.), ] * 2, 4)
+    y = np.array([[0.75, 0.125, 0.625, 0.375], [0.2, 0.4, 0.6, 0.8]]).T
+    assert_almost_equal(x, y)
+
+    samples = h.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+
+@pytest.mark.fast_test
+def test_primes():
+
+    x = _create_primes(1)
+    assert_array_equal(x, [])
+    x = _create_primes(2)
+    assert_array_equal(x, [2])
+    x = _create_primes(3)
+    assert_array_equal(x, [2, 3])
+    x = _create_primes(20)
+    assert_array_equal(x, [2, 3, 5, 7, 11, 13, 17, 19])
+
+
+@pytest.mark.fast_test
+def test_quadrature_combine():
+    a = [1, 2]
+    b = [[4, 4], [5, 6]]
+    x = [[1, 4, 4], [1, 5, 6], [2, 4, 4], [2, 5, 6]]
+    x_test = _quadrature_combine([a, b])
+    assert_array_equal(x_test, x)
+
+
+@pytest.mark.fast_test
+def test_uniform_grid():
+    x = _create_uniform_grid_exclude_border(1, 2)
+    assert_array_equal(x, [[1./3.], [2./3.]])
+    x = _create_uniform_grid_include_border(1, 2)
+    assert_array_equal(x, [[0.], [1.]])
+    x = _create_uniform_grid_only_border(1, 2)
+    assert_array_equal(x, [[0.], [1.]])
+
+    x = _create_uniform_grid_exclude_border(1, 3)
+    assert_array_equal(x, [[1./4.], [2./4.], [3./4.]])
+    x = _create_uniform_grid_include_border(1, 3)
+    assert_array_equal(x, [[0./2.], [1./2.], [2./2.]])
+    x = _create_uniform_grid_only_border(1, 3)
+    assert_array_equal(x, [[0./2.], [1./2.], [2./2.]])
+
+    x = _create_uniform_grid_exclude_border(1, 5)
+    assert_array_equal(x, [[1./6.], [2./6.], [3./6.], [4./6.], [5./6.]])
+    x = _create_uniform_grid_include_border(1, 5)
+    assert_array_equal(x, [[0./4.], [1./4.], [2./4.], [3./4.], [4./4.]])
+    x = _create_uniform_grid_only_border(1, 5)
+    assert_array_equal(x, [[0./4.], [1./4.], [2./4.], [3./4.], [4./4.]])
+
+    x = _create_uniform_grid_exclude_border(2, 2)
+    assert_array_equal(x, [[1. / 3., 1./3.], [1. / 3., 2. / 3.],
+                           [2. / 3., 1. / 3.], [2. / 3., 2. / 3.]])
+    x = _create_uniform_grid_include_border(2, 2)
+    assert_array_equal(x, [[0., 0.], [0., 1.],
+                           [1., 0.], [1., 1.]])
+    x = _create_uniform_grid_only_border(2, 3)
+    assert_array_equal(x, [[0., 0.], [0., 0.5],
+                           [0., 1.], [1., 0.],
+                           [1., 0.5], [1., 1.]])
+
+    assert_raises(AssertionError, _create_uniform_grid_exclude_border, 1, 0)
+    assert_raises(AssertionError, _create_uniform_grid_exclude_border, 0, 1)
+    assert_raises(AssertionError, _create_uniform_grid_include_border, 1, 0)
+    assert_raises(AssertionError, _create_uniform_grid_include_border, 0, 1)
+    assert_raises(AssertionError, _create_uniform_grid_only_border, 1, 1)
+    assert_raises(AssertionError, _create_uniform_grid_only_border, 0, 2)
+
+
+@pytest.mark.fast_test
+def test_grid():
+    grid = Grid()
+    samples = grid.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+    grid = Grid(border="include")
+    samples = grid.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+    grid = Grid(use_full_layout=False)
+    samples = grid.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
+
+    grid = Grid(use_full_layout=True, append_border="include")
+    samples = grid.generate([(0., 1.), ] * 2, 200)
+    assert len(samples) == 200
+    assert len(samples[0]) == 2
diff --git a/skopt/tests/test_samples.py b/skopt/tests/test_samples.py
deleted file mode 100644
index bae6a1617..000000000
--- a/skopt/tests/test_samples.py
+++ /dev/null
@@ -1,176 +0,0 @@
-import pytest
-import numbers
-import numpy as np
-import os
-import yaml
-from tempfile import NamedTemporaryFile
-
-from numpy.testing import assert_array_almost_equal
-from numpy.testing import assert_almost_equal
-from numpy.testing import assert_array_equal
-from numpy.testing import assert_equal
-from numpy.testing import assert_raises_regex
-from scipy import spatial
-from skopt import Optimizer
-from skopt.space import Space
-from skopt.space import Real
-from skopt.space import Integer
-from skopt.space import Categorical
-from skopt.space import check_dimension as space_check_dimension
-from skopt.sampler.sobol import _bit_lo0, _bit_hi1
-from skopt.sampler.halton import _van_der_corput_samples, _create_primes
-from skopt.sampler import Hammersly, Halton, Lhs, Sobol
-
-
-@pytest.mark.fast_test
-def test_lhs_type():
-    lhs = Lhs(lhs_type="classic")
-    samples = lhs.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
-    lhs = Lhs(lhs_type="centered")
-    samples = lhs.generate([(0., 1.), ] * 3, 3)
-    assert_almost_equal(np.sum(samples), 4.5)
-    samples = lhs.generate([("a", "b", "c")], 3)
-    assert samples[0][0] in ["a", "b", "c"]
-
-    samples = lhs.generate([("a", "b", "c"), (0, 1)], 1)
-    assert samples[0][0] in ["a", "b", "c"]
-    assert samples[0][1] in [0, 1]
-
-    samples = lhs.generate([("a", "b", "c"), (0, 1)], 3)
-    assert samples[0][0] in ["a", "b", "c"]
-    assert samples[0][1] in [0, 1]
-
-
-def test_lhs_criterion():
-    for criterion in ["maximin", "ratio", "correlation"]:
-        lhs = Lhs(criterion=criterion, iterations=100)
-        samples = lhs.generate([(0., 1.), ] * 2, 200)
-        assert len(samples) == 200
-        assert len(samples[0]) == 2
-
-
-def test_lhs_pdist():
-    n_dim = 2
-    n_samples = 20
-    lhs = Lhs()
-
-    h = lhs._lhs_normalized(n_dim, n_samples, 0)
-    d_classic = spatial.distance.pdist(np.array(h), 'euclidean')
-    lhs = Lhs(criterion="maximin", iterations=100)
-    h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
-    d = spatial.distance.pdist(np.array(h), 'euclidean')
-    assert np.min(d) > np.min(d_classic)
-
-
-def test_lhs_random_state():
-    n_dim = 2
-    n_samples = 20
-    lhs = Lhs()
-
-    h = lhs._lhs_normalized(n_dim, n_samples, 0)
-    h2 = lhs._lhs_normalized(n_dim, n_samples, 0)
-    assert_array_equal(h, h2)
-    for criterion in ["maximin", "ratio", "correlation"]:
-        lhs = Lhs(criterion=criterion, iterations=100)
-        h = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
-        h2 = lhs.generate([(0., 1.), ] * n_dim, n_samples, random_state=0)
-        assert_array_equal(h, h2)
-
-
-@pytest.mark.fast_test
-def test_bit():
-    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-    res = [2, 1, 3, 1, 2, 1, 4, 1, 2, 1]
-    for i in range(len(X)):
-        assert _bit_lo0(X[i]) == res[i]
-
-    X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-    res = [1, 2, 2, 3, 3, 3, 3, 4, 4, 4]
-    for i in range(len(X)):
-        assert _bit_hi1(X[i]) == res[i]
-
-
-@pytest.mark.fast_test
-def test_sobol():
-    sobol = Sobol()
-    x, seed = sobol._sobol(3, 1)
-    assert_array_equal(x, [0.5, 0.5, 0.5])
-    x, seed = sobol._sobol(3, 2)
-    assert_array_equal(x, [0.75, 0.25, 0.75])
-    x, seed = sobol._sobol(3, 3)
-    assert_array_equal(x, [0.25, 0.75, 0.25])
-    x, seed = sobol._sobol(3, 4)
-    assert_array_equal(x, [0.375, 0.375, 0.625])
-    x, seed = sobol._sobol(3, 5)
-    assert_array_equal(x, [0.875, 0.875, 0.125])
-    x, seed = sobol._sobol(3, 6)
-    assert_array_equal(x, [0.625, 0.125, 0.375])
-
-
-@pytest.mark.fast_test
-def test_generate():
-    sobol = Sobol(min_skip=1, max_skip=1)
-    x = sobol.generate([(0., 1.), ] * 3, 3)
-    x = np.array(x)
-    assert_array_equal(x[0, :], [0.5, 0.5, 0.5])
-    assert_array_equal(x[1, :], [0.75, 0.25, 0.75])
-    assert_array_equal(x[2, :], [0.25, 0.75, 0.25])
-
-
-@pytest.mark.fast_test
-def test_van_der_corput():
-    x = _van_der_corput_samples(range(11), number_base=10)
-    y = [0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9, 0.01, 0.11]
-    assert_array_equal(x, y)
-
-    x = _van_der_corput_samples(range(8), number_base=2)
-    y = [0.5, 0.25, 0.75, 0.125, 0.625, 0.375, 0.875, 0.0625]
-    assert_array_equal(x, y)
-
-
-@pytest.mark.fast_test
-def test_halton():
-    h = Halton()
-    x = h.generate([(0., 1.), ] * 2, 3)
-    y = np.array([[0.125, 0.625, 0.375], [0.4444, 0.7778, 0.2222]]).T
-    assert_array_almost_equal(x, y, 1e-3)
-
-    h = Halton()
-    x = h.generate([(0., 1.), ] * 2, 4)
-    y = np.array([[0.125, 0.625, 0.375, 0.875],
-                  [0.4444, 0.7778, 0.2222, 0.5556]]).T
-    assert_array_almost_equal(x, y, 1e-3)
-
-    samples = h.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
-
-
-@pytest.mark.fast_test
-def test_hammersly():
-    h = Hammersly()
-    x = h.generate([(0., 1.), ] * 2, 3)
-    y = np.array([[0.75, 0.125, 0.625], [0.25, 0.5, 0.75]]).T
-    assert_almost_equal(x, y)
-    x = h.generate([(0., 1.), ] * 2, 4)
-    y = np.array([[0.75, 0.125, 0.625, 0.375], [0.2, 0.4, 0.6, 0.8]]).T
-    assert_almost_equal(x, y)
-
-    samples = h.generate([(0., 1.), ] * 2, 200)
-    assert len(samples) == 200
-    assert len(samples[0]) == 2
-
-
-@pytest.mark.fast_test
-def test_primes():
-
-    x = _create_primes(1)
-    assert_array_equal(x, [])
-    x = _create_primes(2)
-    assert_array_equal(x, [2])
-    x = _create_primes(3)
-    assert_array_equal(x, [2, 3])
-    x = _create_primes(20)
-    assert_array_equal(x, [2, 3, 5, 7, 11, 13, 17, 19])
diff --git a/skopt/utils.py b/skopt/utils.py
index 395413acf..6c769d731 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -16,7 +16,8 @@
 from .learning.gaussian_process.kernels import ConstantKernel
 from .learning.gaussian_process.kernels import HammingKernel
 from .learning.gaussian_process.kernels import Matern
-
+from .sampler import Sobol, Lhs, Hammersly, Halton, Grid
+from .sampler import InitialPointGenerator
 from .space import Space, Categorical, Integer, Real, Dimension
 
 
@@ -342,7 +343,7 @@ def cook_estimator(base_estimator, space=None, **kwargs):
     Parameters
     ----------
     base_estimator : "GP", "RF", "ET", "GBRT", "DUMMY"
-                        or sklearn regressor, default="GP"
+                        or sklearn regressor
         Should inherit from `sklearn.base.RegressorMixin`.
         In addition the `predict` method should have an optional `return_std`
         argument, which returns `std(Y | x)`` along with `E[Y | x]`.
@@ -406,6 +407,52 @@ def cook_estimator(base_estimator, space=None, **kwargs):
     return base_estimator
 
 
+def cook_initial_point_generator(generator, **kwargs):
+    """
+    Cook a default initial point generator.
+
+    For the special generator called "random" the return value is None.
+
+    Parameters
+    ----------
+    generator : "lhs", "sobol", "halton", "hammersly", "grid", "random"
+                        or InitialPointGenerator instance"
+        Should inherit from `skopt.sampler.InitialPointGenerator`.
+
+    kwargs : dict
+        Extra parameters provided to the generator at init time.
+    """
+    if generator is None:
+        generator = "random"
+    elif isinstance(generator, str):
+        generator = generator.lower()
+        if generator not in ["sobol", "halton", "hammersly", "lhs", "random",
+                             "grid"]:
+            raise ValueError("Valid strings for the generator parameter "
+                             " are: 'sobol', 'lhs', 'halton', 'hammersly',"
+                             "'random', or 'grid' not "
+                             "%s." % generator)
+    elif not isinstance(generator, InitialPointGenerator):
+        raise ValueError("generator has to be an InitialPointGenerator."
+                         "Got %s" % (str(type(generator))))
+
+    if isinstance(generator, str):
+        if generator == "sobol":
+            generator = Sobol()
+        elif generator == "halton":
+            generator = Halton()
+        elif generator == "hammersly":
+            generator = Hammersly()
+        elif generator == "lhs":
+            generator = Lhs()
+        elif generator == "grid":
+            generator = Grid()
+        elif generator == "random":
+            return None
+    generator.set_params(**kwargs)
+    return generator
+
+
 def dimensions_aslist(search_space):
     """Convert a dict representation of a search space into a list of
     dimensions, ordered by sorted(search_space.keys()).
@@ -768,13 +815,3 @@ def wrapper(x):
         return wrapper
 
     return decorator
-
-
-def random_permute_matrix(h, random_state=None):
-    rng = check_random_state(random_state)
-    h_rand_perm = np.zeros_like(h)
-    samples, n = h.shape
-    for j in range(n):
-        order = rng.permutation(range(samples))
-        h_rand_perm[:, j] = h[order, j]
-    return h_rand_perm

From 03966135ae56072d0364abcda61c5a821591f21a Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 20:59:31 +0100
Subject: [PATCH 102/265] Add note that the script were copied from
 scikit-learn

---
 .circleci/config.yml                  | 2 +-
 build_tools/circle/build_doc.sh       | 2 +-
 build_tools/circle/build_test_pypy.sh | 2 +-
 build_tools/circle/linting.sh         | 2 +-
 build_tools/circle/list_versions.sh   | 2 +-
 build_tools/circle/push_doc.sh        | 1 +
 6 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index c068ac73c..a0a49700c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,5 +1,5 @@
 version: 2
-
+# # Copied from https://github.com/scikit-learn/scikit-learn/blob/master/.circleci/config.yml
 jobs:
   build:
     docker:
diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index 5d9dfd177..6e8c5997c 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -x
 set -e
-
+# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/build_doc.sh
 # Decide what kind of documentation build to run, and run it.
 #
 # If the last commit message has a "[doc skip]" marker, do not build
diff --git a/build_tools/circle/build_test_pypy.sh b/build_tools/circle/build_test_pypy.sh
index 2d7264af6..a08967423 100644
--- a/build_tools/circle/build_test_pypy.sh
+++ b/build_tools/circle/build_test_pypy.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -x
 set -e
-
+# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/build_test_pypy.sh
 apt-get -yq update
 apt-get -yq install libatlas-base-dev liblapack-dev gfortran ccache libopenblas-dev
 
diff --git a/build_tools/circle/linting.sh b/build_tools/circle/linting.sh
index 055d1b696..5b6f99765 100755
--- a/build_tools/circle/linting.sh
+++ b/build_tools/circle/linting.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/linting.sh
 # This script is used in CircleCI to check that PRs do not add obvious
 # flake8 violations. It relies on two things:
 #   - find common ancestor between branch and
diff --git a/build_tools/circle/list_versions.sh b/build_tools/circle/list_versions.sh
index dcafbfb40..3343160ee 100755
--- a/build_tools/circle/list_versions.sh
+++ b/build_tools/circle/list_versions.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-
+# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/list_versions.sh
 # List all available versions of the documentation
 import json
 import re
diff --git a/build_tools/circle/push_doc.sh b/build_tools/circle/push_doc.sh
index 9c6d6cc95..e85d04e65 100755
--- a/build_tools/circle/push_doc.sh
+++ b/build_tools/circle/push_doc.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/push_doc.sh
 # This script is meant to be called in the "deploy" step defined in
 # circle.yml. See https://circleci.com/docs/ for more details.
 # The behavior of the script is controlled by environment variable defined

From 3374e7f2256ac125909aa14759a80c4b153c0e4a Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 21:27:58 +0100
Subject: [PATCH 103/265] Add license information

---
 build_tools/circle/build_doc.sh             | 3 +++
 build_tools/circle/build_test_pypy.sh       | 2 ++
 build_tools/circle/checkout_merge_commit.sh | 4 +++-
 build_tools/circle/deploy.sh                | 1 +
 build_tools/circle/linting.sh               | 3 +++
 build_tools/circle/list_versions.sh         | 2 ++
 build_tools/circle/push_doc.sh              | 3 +++
 build_tools/travis/test_script.sh           | 5 +++++
 doc/modules/plots.rst                       | 4 ++--
 9 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index 6e8c5997c..8649463cd 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -2,6 +2,9 @@
 set -x
 set -e
 # Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/build_doc.sh
+# The scikit-learn developers.
+# License: BSD-style
+#
 # Decide what kind of documentation build to run, and run it.
 #
 # If the last commit message has a "[doc skip]" marker, do not build
diff --git a/build_tools/circle/build_test_pypy.sh b/build_tools/circle/build_test_pypy.sh
index a08967423..a7a86eb9a 100644
--- a/build_tools/circle/build_test_pypy.sh
+++ b/build_tools/circle/build_test_pypy.sh
@@ -2,6 +2,8 @@
 set -x
 set -e
 # Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/build_test_pypy.sh
+# The scikit-learn developers.
+# License: BSD-style
 apt-get -yq update
 apt-get -yq install libatlas-base-dev liblapack-dev gfortran ccache libopenblas-dev
 
diff --git a/build_tools/circle/checkout_merge_commit.sh b/build_tools/circle/checkout_merge_commit.sh
index edbc679bd..73947081b 100755
--- a/build_tools/circle/checkout_merge_commit.sh
+++ b/build_tools/circle/checkout_merge_commit.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
-
+# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/checkout_merge_commit.sh
+# The scikit-learn developers.
+# License: BSD-style
 
 # Add `master` branch to the update list.
 # Otherwise CircleCI will give us a cached one.
diff --git a/build_tools/circle/deploy.sh b/build_tools/circle/deploy.sh
index 824f7ba23..c527e77b4 100644
--- a/build_tools/circle/deploy.sh
+++ b/build_tools/circle/deploy.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
 # Almost copied verbatim from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/push_doc.sh
+#
 export SKOPT_HOME=$(pwd)
 
 if [ -z $CIRCLE_PROJECT_USERNAME ];
diff --git a/build_tools/circle/linting.sh b/build_tools/circle/linting.sh
index 5b6f99765..bdeeaa0cd 100755
--- a/build_tools/circle/linting.sh
+++ b/build_tools/circle/linting.sh
@@ -1,5 +1,8 @@
 #!/bin/bash
 # Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/linting.sh
+# The scikit-learn developers.
+# License: BSD-style
+#
 # This script is used in CircleCI to check that PRs do not add obvious
 # flake8 violations. It relies on two things:
 #   - find common ancestor between branch and
diff --git a/build_tools/circle/list_versions.sh b/build_tools/circle/list_versions.sh
index 3343160ee..a8b6c2d5d 100755
--- a/build_tools/circle/list_versions.sh
+++ b/build_tools/circle/list_versions.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env python3
 # Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/list_versions.sh
+# The scikit-learn developers.
+# License: BSD-style
 # List all available versions of the documentation
 import json
 import re
diff --git a/build_tools/circle/push_doc.sh b/build_tools/circle/push_doc.sh
index e85d04e65..1c8eae252 100755
--- a/build_tools/circle/push_doc.sh
+++ b/build_tools/circle/push_doc.sh
@@ -1,5 +1,8 @@
 #!/bin/bash
 # Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/push_doc.sh
+# The scikit-learn developers.
+# License: BSD-style
+#
 # This script is meant to be called in the "deploy" step defined in
 # circle.yml. See https://circleci.com/docs/ for more details.
 # The behavior of the script is controlled by environment variable defined
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index b5c05b033..c38e5fa0c 100644
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -1,4 +1,9 @@
 #!/bin/bash
+#
+# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/travis/test_script.sh
+# The scikit-learn developers.
+# License: BSD-style
+#
 # This script is meant to be called by the "script" step defined in
 # .travis.yml. See https://docs.travis-ci.com/ for more details.
 # The behavior of the script is controlled by environment variabled defined
diff --git a/doc/modules/plots.rst b/doc/modules/plots.rst
index 7762764e6..77f8b3ee0 100644
--- a/doc/modules/plots.rst
+++ b/doc/modules/plots.rst
@@ -21,7 +21,7 @@ plot_evaluations
 ================
 :class:`plot_evaluations` visualize the order in which points where sampled.
 
-.. figure:: ../auto_examples/images/sphx_glr_visualizing-results_002.png
+.. figure:: ../auto_examples/plots/images/sphx_glr_visualizing-results_002.png
    :target: ../auto_examples/plots/visualizing-results.htm
    :align: center
 
@@ -29,7 +29,7 @@ plot_objective
 ==============
 :class:`plot_objective` creates pairwise dependence plot of the objective function.
 
-.. figure:: ../auto_examples/images/sphx_glr_partial-dependence-plot_001.png
+.. figure:: ../auto_examples/plots/images/sphx_glr_partial-dependence-plot_001.png
    :target: ../auto_examples/plots/partial-dependence-plot.html
    :align: center
 

From 14ff0ae03e355ea3186c84616d3af83a80d8bd6c Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 22:10:51 +0100
Subject: [PATCH 104/265] Fix pdf creation and reduce example build time

---
 examples/plots/visualizing-results.py | 4 ++--
 skopt/benchmarks.py                   | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/plots/visualizing-results.py b/examples/plots/visualizing-results.py
index 40c0e8aa8..88cb0e0f4 100644
--- a/examples/plots/visualizing-results.py
+++ b/examples/plots/visualizing-results.py
@@ -199,7 +199,7 @@ def plot_branin():
 #############################################################################
 
 _ = plot_evaluations(forest_res)
-_ = plot_objective(forest_res)
+_ = plot_objective(forest_res, n_samples=40)
 
 #############################################################################
 # Going from 6 to 6+2 dimensions
@@ -218,6 +218,6 @@ def plot_branin():
                              base_estimator="ET", random_state=4)
 
 _ = plot_evaluations(forest_res)
-_ = plot_objective(forest_res)
+_ = plot_objective(forest_res, n_samples=40)
 
 # .. [Friedman (2001)] `doi:10.1214/aos/1013203451 section 8.2 <http://projecteuclid.org/euclid.aos/1013203451>`
diff --git a/skopt/benchmarks.py b/skopt/benchmarks.py
index 0eacd76b0..02741e2e9 100644
--- a/skopt/benchmarks.py
+++ b/skopt/benchmarks.py
@@ -68,7 +68,8 @@ def bench5(x):
 
 def branin(x, a=1, b=5.1 / (4 * np.pi**2), c=5. / np.pi,
            r=6, s=10, t=1. / (8 * np.pi)):
-    """Branin-Hoo function is defined on the square x1 ∈ [-5, 10], x2 ∈ [0, 15].
+    """Branin-Hoo function is defined on the square
+    :math:`x1 \in [-5, 10], x2 \in [0, 15]`.
 
     It has three minima with f(x*) = 0.397887 at x* = (-pi, 12.275),
     (+pi, 2.275), and (9.42478, 2.475).

From aaa125702bdede1d25b30888d31a8191975a5de4 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 22:20:50 +0100
Subject: [PATCH 105/265] Adapt flake8

---
 setup.cfg | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/setup.cfg b/setup.cfg
index 5e9733634..82f184d34 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -27,3 +27,6 @@ universal = 1
 [coverage:run]
 omit = */tests/*
 
+[flake8]
+# Default flake8 3.5 ignored flags
+ignore=E121,E123,E126,E226,E24,E704,W503,W504
\ No newline at end of file

From f94467c652f5e9c800fb904e5446145e1ba4b2e0 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 22:26:29 +0100
Subject: [PATCH 106/265] Fix escape sign

---
 skopt/benchmarks.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/skopt/benchmarks.py b/skopt/benchmarks.py
index 02741e2e9..031e9994f 100644
--- a/skopt/benchmarks.py
+++ b/skopt/benchmarks.py
@@ -66,10 +66,10 @@ def bench5(x):
     return float(x[0]) ** 2 + x[1] ** 2
 
 
-def branin(x, a=1, b=5.1 / (4 * np.pi**2), c=5. / np.pi,
+def branin(x, a=1, b=5.1 / (4 * np.pi ** 2), c=5. / np.pi,
            r=6, s=10, t=1. / (8 * np.pi)):
     """Branin-Hoo function is defined on the square
-    :math:`x1 \in [-5, 10], x2 \in [0, 15]`.
+    :math:`x1 \\in [-5, 10], x2 \\in [0, 15]`.
 
     It has three minima with f(x*) = 0.397887 at x* = (-pi, 12.275),
     (+pi, 2.275), and (9.42478, 2.475).
@@ -82,10 +82,10 @@ def branin(x, a=1, b=5.1 / (4 * np.pi**2), c=5. / np.pi,
 
 def hart6(x,
           alpha=np.asarray([1.0, 1.2, 3.0, 3.2]),
-          P=10**-4 * np.asarray([[1312, 1696, 5569, 124, 8283, 5886],
-                                 [2329, 4135, 8307, 3736, 1004, 9991],
-                                 [2348, 1451, 3522, 2883, 3047, 6650],
-                                 [4047, 8828, 8732, 5743, 1091, 381]]),
+          P=10 ** -4 * np.asarray([[1312, 1696, 5569, 124, 8283, 5886],
+                                   [2329, 4135, 8307, 3736, 1004, 9991],
+                                   [2348, 1451, 3522, 2883, 3047, 6650],
+                                   [4047, 8828, 8732, 5743, 1091, 381]]),
           A=np.asarray([[10, 3, 17, 3.50, 1.7, 8],
                         [0.05, 10, 17, 0.1, 8, 14],
                         [3, 3.5, 1.7, 10, 17, 8],
@@ -97,4 +97,4 @@ def hart6(x,
 
     More details: <http://www.sfu.ca/~ssurjano/hart6.html>
     """
-    return -np.sum(alpha * np.exp(-np.sum(A * (np.array(x) - P)**2, axis=1)))
+    return -np.sum(alpha * np.exp(-np.sum(A * (np.array(x) - P) ** 2, axis=1)))

From ae1a3efd6a0ab382a9d2a37b87007b53f718cb18 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 23:54:25 +0100
Subject: [PATCH 107/265] Fix ending and file for doc generation

---
 build_tools/circle/{list_versions.sh => list_versions.py} | 0
 doc/Makefile                                              | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename build_tools/circle/{list_versions.sh => list_versions.py} (100%)
 mode change 100755 => 100644

diff --git a/build_tools/circle/list_versions.sh b/build_tools/circle/list_versions.py
old mode 100755
new mode 100644
similarity index 100%
rename from build_tools/circle/list_versions.sh
rename to build_tools/circle/list_versions.py
diff --git a/doc/Makefile b/doc/Makefile
index 8ae7c8497..73e661410 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -107,4 +107,4 @@ optipng:
 	  | xargs -0 -n 1 -P 4 optipng -o10
 
 dist: html latexpdf
-	cp _build/latex/user_guide.pdf _build/html/stable/_downloads/scikit-optimize-docs.pdf
+	cp _build/latex/scikit-optimize.pdf _build/html/stable/_downloads/scikit-optimize-docs.pdf

From 6ed2c4d44beef6e19ae5f7d57ae1b80fd5433813 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 23:56:13 +0100
Subject: [PATCH 108/265] Fix pep8

---
 build_tools/circle/list_versions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
index a8b6c2d5d..a1e213c45 100644
--- a/build_tools/circle/list_versions.py
+++ b/build_tools/circle/list_versions.py
@@ -96,4 +96,4 @@ def get_pdf_size(version):
     if pdf_size is not None:
         out += (' (`PDF %s <%s/_downloads/scikit-optimize-docs.pdf>`_)'
                 % (pdf_size, path))
-    print(out)
\ No newline at end of file
+    print(out)

From 31341b6012585d509ea22608f71218fbc18a4ba5 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 20 Feb 2020 23:59:41 +0100
Subject: [PATCH 109/265] Fix pep8

---
 build_tools/circle/list_versions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
index a1e213c45..b745d14d8 100644
--- a/build_tools/circle/list_versions.py
+++ b/build_tools/circle/list_versions.py
@@ -10,6 +10,7 @@
 from distutils.version import LooseVersion
 from urllib.request import urlopen
 
+
 def json_urlread(url):
     try:
         return json.loads(urlopen(url).read().decode('utf8'))

From 28f14d7220d4c6a7c539bee48f54d9074b6d9e1f Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 21 Feb 2020 00:01:40 +0100
Subject: [PATCH 110/265] Fix pep8

---
 build_tools/circle/list_versions.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
index b745d14d8..63c7ab7ad 100644
--- a/build_tools/circle/list_versions.py
+++ b/build_tools/circle/list_versions.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
-# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/list_versions.sh
+# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/
+# build_tools/circle/list_versions.sh
 # The scikit-learn developers.
 # License: BSD-style
 # List all available versions of the documentation
@@ -20,7 +21,8 @@ def json_urlread(url):
 
 
 def human_readable_data_quantity(quantity, multiple=1024):
-    # https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
+    # https://stackoverflow.com/questions/1094841/
+    # reusable-library-to-get-human-readable-version-of-file-size
     if quantity == 0:
         quantity = +0
     SUFFIXES = ["B"] + [i + {1000: "B", 1024: "iB"}[multiple]

From b9de36a192f9e01826aa2ffcbb9040133b71f740 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 21 Feb 2020 00:07:36 +0100
Subject: [PATCH 111/265] Add link to other versions

---
 doc/themes/scikit-learn-modern/nav.html | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/themes/scikit-learn-modern/nav.html b/doc/themes/scikit-learn-modern/nav.html
index 5f908d94e..3c1109ecf 100644
--- a/doc/themes/scikit-learn-modern/nav.html
+++ b/doc/themes/scikit-learn-modern/nav.html
@@ -9,7 +9,8 @@
 {%- set drop_down_navigation = [
   ('Getting Started', pathto('getting_started')),
   ('Development', pathto('development')),
-  ('GitHub', 'https://github.com/scikit-optimize/scikit-optimize')]
+  ('GitHub', 'https://github.com/scikit-optimize/scikit-optimize'),
+  ('Other Versions', 'https://scikit-optimize.github.io/dev/versions.html')]
 -%}
 
 <nav id="navbar" class="{{ nav_bar_class }} navbar navbar-expand-md navbar-light bg-light py-0">

From e02f5e418afd256ecce1483b479276b32a0fc866 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 21 Feb 2020 00:21:46 +0100
Subject: [PATCH 112/265] Increase version of master

---
 skopt/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/__init__.py b/skopt/__init__.py
index 1239ffa5d..2531d63a7 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -29,7 +29,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = "0.7.3"
+__version__ = "0.8.dev0"
 
 if __SKOPT_SETUP__:
     import sys

From 344645485b87c02db76981439572fdee64fd6148 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 21 Feb 2020 07:49:45 +0100
Subject: [PATCH 113/265] Fix list_version script

---
 build_tools/circle/list_versions.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
index 63c7ab7ad..b63db3cc2 100644
--- a/build_tools/circle/list_versions.py
+++ b/build_tools/circle/list_versions.py
@@ -86,6 +86,8 @@ def get_pdf_size(version):
 for name in (NAMED_DIRS +
              sorted((k for k in dirs if k[:1].isdigit()),
                     key=LooseVersion, reverse=True)):
+    if name not in dirs:
+        continue
     version_num, pdf_size = dirs[name]
     if version_num in seen:
         # symlink came first

From 957bdd72056727ef511621d69d7fc7457eb3a680 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 21 Feb 2020 18:27:06 +0100
Subject: [PATCH 114/265] Add link to other versions

---
 doc/themes/scikit-learn-modern/layout.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/themes/scikit-learn-modern/layout.html b/doc/themes/scikit-learn-modern/layout.html
index a2ad0e25d..362a8f153 100644
--- a/doc/themes/scikit-learn-modern/layout.html
+++ b/doc/themes/scikit-learn-modern/layout.html
@@ -78,6 +78,7 @@
         <div class="alert alert-danger p-1 mb-2" role="alert">
           <p class="text-center mb-0">
           <strong>scikit-optimize {{ release }}</strong><br/>
+            <a href="https://scikit-optimize.github.io/dev/versions.html">Other versions</a>
           </p>
         </div>
         {%- endif %}

From 09d00a99a099b36eed2873a0c03f4b21529748a1 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 21 Feb 2020 18:32:48 +0100
Subject: [PATCH 115/265] Cleanup not needed build tools

---
 .circleci/config.yml          | 40 -----------------------------------
 build_tools/circle/deploy.sh  | 40 -----------------------------------
 build_tools/circle/execute.sh | 19 -----------------
 build_tools/circle/install.sh | 12 -----------
 4 files changed, 111 deletions(-)
 delete mode 100644 build_tools/circle/deploy.sh
 delete mode 100644 build_tools/circle/execute.sh
 delete mode 100644 build_tools/circle/install.sh

diff --git a/.circleci/config.yml b/.circleci/config.yml
index a0a49700c..63d7077fe 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,36 +1,6 @@
 version: 2
 # # Copied from https://github.com/scikit-learn/scikit-learn/blob/master/.circleci/config.yml
 jobs:
-  build:
-    docker:
-      - image: continuumio/miniconda3
-    steps:
-      - checkout
-      - restore_cache:
-          key: deps-{{ .Branch }}-{{ checksum "setup.py" }}-{{ checksum "build_tools/circle/install.sh" }}
-      - run:
-          name: Install dependencies
-          command: |
-            bash build_tools/circle/install.sh
-          no_output_timeout: 1024s
-      - save_cache:
-          key: deps-{{ .Branch }}-{{ checksum "setup.py" }}-{{ checksum "build_tools/circle/install.sh" }}
-          paths:
-            - /opt/conda/pkgs
-            - ~/.cache/pip
-      - run:
-          name: Run build
-          command: |
-            bash build_tools/circle/execute.sh
-            if grep -q "Traceback (most recent call last):" nb_to_md.txt; then false; else true; fi
-          no_output_timeout: 3600s
-      - store_artifacts:
-          path: ~/doc
-          destination: doc
-      - persist_to_workspace:
-          root: ~/doc
-          paths: .
-
   doc-min-dependencies:
     docker:
       - image: circleci/python:3.7.3-stretch
@@ -151,16 +121,6 @@ jobs:
               bash build_tools/circle/push_doc.sh doc/_build/html/stable
             fi
 
-  deploy-old:
-    docker:
-      - image: circleci/python:3.6.7
-    steps:
-      - checkout
-      - attach_workspace:
-          at: ~/doc
-      - deploy:
-          command: bash build_tools/circle/deploy.sh
-
 workflows:
   version: 2
   build-doc-and-deploy:
diff --git a/build_tools/circle/deploy.sh b/build_tools/circle/deploy.sh
deleted file mode 100644
index c527e77b4..000000000
--- a/build_tools/circle/deploy.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-# Almost copied verbatim from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/push_doc.sh
-#
-export SKOPT_HOME=$(pwd)
-
-if [ -z $CIRCLE_PROJECT_USERNAME ];
-then USERNAME="skoptci";
-else USERNAME=$CIRCLE_PROJECT_USERNAME;
-fi
-
-MSG="Pushing the docs for revision for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1"
-
-# Copying to github pages
-echo "Copying built files"
-git clone -b master "git@github.com:scikit-optimize/scikit-optimize.github.io" deploy
-cd deploy
-git rm -r space
-git rm -r optimizer
-git rm -r learning
-cd ..
-for entry in ${HOME}/doc/skopt/*
-do
-  echo "$entry"
-done
-
-cp -r ${HOME}/doc/skopt/* deploy
-# Move into deployment directory
-cd deploy
-
-# Commit changes, allowing empty changes (when unchanged)
-echo "Committing and pushing to Github"
-echo "$USERNAME"
-git config --global user.name $USERNAME
-git config --global user.email "skoptci@gmail.com"
-git config --global push.default matching
-git add -A
-git commit --allow-empty -m "$MSG"
-git push
-
-echo "$MSG"
diff --git a/build_tools/circle/execute.sh b/build_tools/circle/execute.sh
deleted file mode 100644
index 28006266a..000000000
--- a/build_tools/circle/execute.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-source activate testenv
-export SKOPT_HOME=$(pwd)
-
-python --version
-python -c "import numpy; print('numpy %s' % numpy.__version__)"
-python -c "import scipy; print('scipy %s' % scipy.__version__)"
-
-
-mkdir -p ${HOME}/doc/skopt
-
-cd ~
-cd ${SKOPT_HOME}/doc && sphinx-build -M html ${SKOPT_HOME}/doc ${SKOPT_HOME}/doc/_build # -W --keep-going
-
-for entry in ${SKOPT_HOME}/doc/_build/*
-do
-  echo "$entry"
-done
-
-cp -r ${SKOPT_HOME}/doc/_build/html/* ${HOME}/doc/skopt
diff --git a/build_tools/circle/install.sh b/build_tools/circle/install.sh
deleted file mode 100644
index f0a67d38c..000000000
--- a/build_tools/circle/install.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-conda update -n base conda
-conda create -n testenv --yes python pip pytest nose
-source activate testenv
-
-python -m pip install -e '.[plots]'
-export SKOPT_HOME=$(pwd)
-
-python -m pip install sphinx sphinx-gallery numpydoc memory_profiler
-
-# importing matplotlib once builds the font caches. This avoids
-# having warnings in our example notebooks
-python -c "import matplotlib.pyplot as plt"

From 9c81cd51b423e3eab35a1dc0e8fd8822e2897396 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 23 Feb 2020 08:43:56 +0100
Subject: [PATCH 116/265] Fix WhatsNew list and Moved Changelog to
 doc/whats_new

---
 CHANGELOG.md                          | 196 +-------------------------
 README.rst                            |   2 +-
 build_tools/circle/build_test_pypy.sh |   0
 doc/contents.rst                      |   1 +
 doc/preface.rst                       |  25 ++++
 doc/templates/index.html              |   2 +-
 doc/whats_new.rst                     |   4 +-
 doc/whats_new/_contributors.rst       |  12 ++
 doc/whats_new/v0.7.1.rst              |  22 ---
 doc/whats_new/v0.7.2.rst              |  26 ----
 doc/whats_new/v0.7.3.rst              |  11 --
 doc/whats_new/v0.7.rst                | 124 ++++++++++++++--
 doc/whats_new/v0.8.rst                |  19 +--
 13 files changed, 161 insertions(+), 283 deletions(-)
 mode change 100644 => 100755 build_tools/circle/build_test_pypy.sh
 create mode 100644 doc/preface.rst
 create mode 100644 doc/whats_new/_contributors.rst
 delete mode 100644 doc/whats_new/v0.7.1.rst
 delete mode 100644 doc/whats_new/v0.7.2.rst
 delete mode 100644 doc/whats_new/v0.7.3.rst

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b37b5bfd1..f9d8b4e29 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,199 +1,5 @@
 # Release history
-## Version 0.7.2
-
-## New features
-* Add expected_minimum_random_sampling
-* New plot examples
-* Add more parameter to plot_objective 
-* Return ordereddict in point_asdict
-* update_next() and get_results() added to Optimize
-
-## Bug fixes
-
-* Fix searchcv rank (issue #831)
-* Fix random forest regressor (issue #766)
-* Fix doc examples
-* Fix integer normalize by using round()
-* Fix random forest regressor (Add missing min_impurity_decrease)
-
-## Maintenance
-* Fix license detection in github
-* Add doctest to CI
-
-## Version 0.7.1
-
-### New features
-
-* Sphinx documentation
-* notebooks are replaced by sphinx-gallery
-* New StringEncoder, can be used in Categoricals
-* Remove string conversion in Identity
-* dtype can be set in Integer and Real
-
-### Bug fixes
-
-* Fix categorical space (issue #821)
-* int can be set as dtype to fix issue #790
-
-### Maintenance
-
-* Old pdoc scripts are removed and replaced by sphinx
-
-## Version 0.7
-
-### New features
-
-* Models queue has now a customizable size (model_queue_size).
-* Add log-uniform prior to Integer space
-* Support for plotting categorical dimensions
-
-### Bug fixes
-
-* Allow BayesSearchCV to work with sklearn 0.21 
-* Reduce the amount of deprecation warnings in unit tests
-
-### Maintenance
-
-* joblib instead of sklearn.externals.joblib 
-* Improve travis CI unit tests (Different sklearn version are checked)
-* Added `versioneer` support, to keep things simple and to fix pypi deploy
-
-## Version 0.6
-
-Highly composite six.
-
-### New features
-
-* `plot_regret` function for plotting the cumulative regret; 
-The purpose of such plot is to access how much an optimizer 
-is effective at picking good points.
-* `CheckpointSaver` that can be used to save a 
-checkpoint after each iteration with skopt.dump
-* `Space.from_yaml()`
- to allow for external file to define Space parameters
-
-### Bug fixes
-
-* Fixed numpy broadcasting issues in gaussian_ei, gaussian_pi 
-* Fixed build with newest scikit-learn 
-* Use native python types inside BayesSearchCV
-* Include fit_params in BayesSearchCV refit 
-
-### Maintenance
-
-* Added `versioneer` support, to reduce changes with new version of the `skopt`
-
-## Version 0.5.2
-
-### Bug fixes
-
-* Separated `n_points` from `n_jobs` in `BayesSearchCV`.
-* Dimensions now support boolean np.arrays.
-
-### Maintenance
-
-* `matplotlib` is now an optional requirement (install with `pip install 'scikit-optimize[plots]'`)
-
-## Version 0.5
-
-High five!
-
-### New features
-
-* Single element dimension definition, which can be used to
-fix the value of a dimension during optimization.
-* `total_iterations` property of `BayesSearchCV` that
-counts total iterations needed to explore all subspaces.
-* Add iteration event handler for `BayesSearchCV`, useful
-for early stopping inside `BayesSearchCV` search loop.
-* added `utils.use_named_args` decorator to help with unpacking named dimensions
-when calling an objective function.
-
-### Bug fixes
-
-* Removed redundant estimator fitting inside `BayesSearchCV`.
-* Fixed the log10 transform for Real dimensions that would lead to values being
-  out of bounds.
-
-## Version 0.4
-
-Go forth!
-
-### New features
-
-* Support early stopping of optimization loop.
-* Benchmarking scripts to evaluate performance of different surrogate models.
-* Support for parallel evaluations of the objective function via several
-  constant liar stategies.
-* BayesSearchCV as a drop in replacement for scikit-learn's GridSearchCV.
-* New acquisition functions "EIps" and "PIps" that takes into account
-  function compute time.
-
-### Bug fixes
-
-* Fixed inference of dimensions of type Real.
-
-### API changes
-
-* Change interface of GradientBoostingQuantileRegressor's predict method to
-  match return type of other regressors
-* Dimensions of type Real are now inclusive of upper bound.
-
-
-## Version 0.3
-
-Third time's a charm.
-
-### New features
-
-* Accuracy improvements of the optimization of the acquisition function
-by pre-selecting good candidates as starting points when
-using `acq_optimizer='lbfgs'`.
-* Support a ask-and-tell interface. Check out the `Optimizer` class if you need
-fine grained control over the iterations.
-* Parallelize L-BFGS minimization runs over the acquisition function.
-* Implement weighted hamming distance kernel for problems with only categorical dimensions.
-* New acquisition function `gp_hedge` that probabilistically chooses one of `EI`, `PI`
-or `LCB` at every iteration depending upon the cumulative gain.
-
-### Bug fixes
-* Warnings are now raised if a point is chosen as the candidate optimum multiple
-times.
-* Infinite gradients that were raised in the kernel gradient computation are
-now fixed.
-* Integer dimensions are now normalized to [0, 1] internally in `gp_minimize`.
-
-### API Changes.
-* The default `acq_optimizer` function has changed from `"auto"` to `"lbfgs"`
-in `gp_minimize`.
-
-
-## Version 0.2
-
-### New features
-
-* Speed improvements when using `gp_minimize` with `acq_optimizer='lbfgs'` and
-`acq_optimizer='auto'` when all the search-space dimensions are Real.
-* Persistence of minimization results using `skopt.dump` and `skopt.load`.
-* Support for using arbitrary estimators that implement a
-`return_std` argument in their `predict` method by means of `base_minimize` from `skopt.optimizer.`
-* Support for tuning noise in `gp_minimize` using the `noise` argument.
-* `TimerCallback` in `skopt.callbacks` to log the time between iterations of
-the minimization loop.
-
-
-## Version 0.1
-
-First light!
-
-### New features
-
-* Bayesian optimization via `gp_minimize`.
-* Tree-based sequential model-based optimization via `forest_minimize` and `gbrt_minimize`, with support for multi-threading.
-* Support of LCB, EI and PI as acquisition functions.
-* Plotting functions for inspecting convergence, evaluations and the objective function.
-* API for specifying and sampling from a parameter space.
-
+See https://scikit-optimize.github.io/dev/whats_new.html
 
 # Contributors
 
diff --git a/README.rst b/README.rst
index 019d68147..f44a174ca 100644
--- a/README.rst
+++ b/README.rst
@@ -139,7 +139,7 @@ create a new issue and work through the following checklist:
 * update the version tag in ``__init__.py``
 * update the version tag mentioned in the README
 * check if the dependencies in ``setup.py`` are valid or need unpinning
-* check that the ``CHANGELOG.md`` is up to date
+* check that the ``doc/whats_new/v0.X.rst`` is up to date
 * did the last build of master succeed?
 * create a `new release <https://github.com/scikit-optimize/scikit-optimize/releases>`__
 * ping `conda-forge <https://github.com/conda-forge/scikit-optimize-feedstock>`__
diff --git a/build_tools/circle/build_test_pypy.sh b/build_tools/circle/build_test_pypy.sh
old mode 100644
new mode 100755
diff --git a/doc/contents.rst b/doc/contents.rst
index 93b93c808..837fe4345 100644
--- a/doc/contents.rst
+++ b/doc/contents.rst
@@ -14,6 +14,7 @@ Table Of Contents
 .. toctree::
     :maxdepth: 2
 
+    preface
     getting_started
     user_guide
     auto_examples/index
diff --git a/doc/preface.rst b/doc/preface.rst
new file mode 100644
index 000000000..c80f619cd
--- /dev/null
+++ b/doc/preface.rst
@@ -0,0 +1,25 @@
+.. This helps define the TOC ordering for "about us" sections. Particularly
+   useful for PDF output as this section is not linked from elsewhere.
+
+.. Places global toc into the sidebar
+
+:globalsidebartoc: True
+
+.. _preface_menu:
+
+.. include:: includes/big_toc_css.rst
+.. include:: tune_toc.rst
+
+==========================
+Welcome to scikit-optimize
+==========================
+
+|
+
+.. toctree::
+    :maxdepth: 2
+
+    install
+    whats_new
+
+|
\ No newline at end of file
diff --git a/doc/templates/index.html b/doc/templates/index.html
index 6afb4c8c2..76bbb34f2 100644
--- a/doc/templates/index.html
+++ b/doc/templates/index.html
@@ -106,7 +106,7 @@ <h4 class="sk-landing-subheader text-dark font-italic mb-3">Sequential model-bas
         <h4 class="sk-landing-call-header">News</h4>
         <ul class="sk-landing-call-list list-unstyled">
         <li><strong>On-going development:</strong>
-        <a href="whats_new.html"><strong>What's new</strong> (Changelog)</a>
+        <a href="https://scikit-optimize.github.io/dev/whats_new.html"><strong>What's new</strong> (Changelog)</a>
         </li>
         <li><strong>Feb 2020.</strong> scikit-optimize 0.7.2  (<a href="whats_new/v0.7.2.html">Changelog</a>).
         <li><strong>Feb 2020.</strong> scikit-optimize 0.7.1  (<a href="whats_new/v0.7.1.html">Changelog</a>).
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index e969e6b8f..b1e166f8e 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -7,10 +7,8 @@ Release notes for all scikit-optimize releases are linked in this this page.
 
 .. toctree::
     :maxdepth: 1
+
     Version 0.8 <whats_new/v0.8.rst>
-    Version 0.7.3 <whats_new/v0.7.3.rst>
-    Version 0.7.2 <whats_new/v0.7.2.rst>
-    Version 0.7.1 <whats_new/v0.7.1.rst>
     Version 0.7 <whats_new/v0.7.rst>
     Version 0.6 <whats_new/v0.6.rst>
     Version 0.5 <whats_new/v0.5.rst>
diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst
new file mode 100644
index 000000000..83453a9fe
--- /dev/null
+++ b/doc/whats_new/_contributors.rst
@@ -0,0 +1,12 @@
+.. role:: raw-html(raw)
+   :format: html
+
+.. role:: raw-latex(raw)
+   :format: latex
+
+.. |MajorFeature| replace:: :raw-html:`<span class="badge badge-success">Major Feature</span>` :raw-latex:`{\small\sc [Major Feature]}`
+.. |Feature| replace:: :raw-html:`<span class="badge badge-success">Feature</span>` :raw-latex:`{\small\sc [Feature]}`
+.. |Efficiency| replace:: :raw-html:`<span class="badge badge-info">Efficiency</span>` :raw-latex:`{\small\sc [Efficiency]}`
+.. |Enhancement| replace:: :raw-html:`<span class="badge badge-info">Enhancement</span>` :raw-latex:`{\small\sc [Enhancement]}`
+.. |Fix| replace:: :raw-html:`<span class="badge badge-danger">Fix</span>` :raw-latex:`{\small\sc [Fix]}`
+.. |API| replace:: :raw-html:`<span class="badge badge-warning">API Change</span>` :raw-latex:`{\small\sc [API Change]}`
diff --git a/doc/whats_new/v0.7.1.rst b/doc/whats_new/v0.7.1.rst
deleted file mode 100644
index 800008942..000000000
--- a/doc/whats_new/v0.7.1.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-Version 0.7.1
-=============
-
-New features
-------------
-
-* Sphinx documentation
-* notebooks are replaced by sphinx-gallery
-* New StringEncoder, can be used in Categoricals
-* Remove string conversion in Identity
-* dtype can be set in Integer and Real
-
-Bug fixes
----------
-
-* Fix categorical space (issue #821)
-* int can be set as dtype to fix issue #790
-
-Maintenance
------------
-
-* Old pdoc scripts are removed and replaced by sphinx
diff --git a/doc/whats_new/v0.7.2.rst b/doc/whats_new/v0.7.2.rst
deleted file mode 100644
index c36b620b2..000000000
--- a/doc/whats_new/v0.7.2.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-Version 0.7.2
-=============
-New features
-------------
-
-* Add expected_minimum_random_sampling
-* New plot examples
-* Add more parameter to plot_objective
-* Return ordereddict in point_asdict
-* update_next() and get_results() added to Optimize
-
-Bug fixes
----------
-
-* Fix searchcv rank (issue #831)
-* Fix random forest regressor (issue #766)
-* Fix doc examples
-* Fix integer normalize by using round()
-* Fix random forest regressor (Add missing min_impurity_decrease)
-
-Maintenance
------------
-
-* Fix license detection in github
-* Add doctest to CI
-
diff --git a/doc/whats_new/v0.7.3.rst b/doc/whats_new/v0.7.3.rst
deleted file mode 100644
index 43826b3fc..000000000
--- a/doc/whats_new/v0.7.3.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-Version 0.7.3
-=============
-**not released**
-New features
-------------
-
-Bug fixes
----------
-
-Maintenance
------------
diff --git a/doc/whats_new/v0.7.rst b/doc/whats_new/v0.7.rst
index 335cb71fa..7eb0170b1 100644
--- a/doc/whats_new/v0.7.rst
+++ b/doc/whats_new/v0.7.rst
@@ -1,22 +1,116 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: skopt
+
+.. _changes_0_7_2:
+
+Version 0.7.2
+=============
+**February 2020**
+
+:mod:`skopt.optimizer`
+----------------------
+- |Feature| update_next() and get_results() added to Optimize and
+  add more examples
+  :pr:`837` by :user:`Holger Nahrstaedt <holgern>` and
+  `Sigurd Carlsen <sigurdcarlsen>`
+- |Fix| Fix random forest regressor (Add missing min_impurity_decrease)
+  :pr:`829` by :user:`Holger Nahrstaedt <holgern>`
+
+:mod:`skopt.utils`
+------------------
+- |Enhancement| Add expected_minimum_random_sampling
+  :pr:`830` by :user:`Holger Nahrstaedt <holgern>`
+- |FIX| Return ordereddict in point_asdict and add some more unit tests.
+  :pr:`840` by :user:`Holger Nahrstaedt <holgern>`
+- |Enhancement| Added `check_list_types` and `check_dimension_names`
+  :pr:`803` by :user:`Hvass-Labs <Hvass-Labs>` and
+  :user:`Holger Nahrstaedt <holgern>`
+
+:mod:`skopt.plots`
+------------------
+- |Enhancement| Add more parameter to plot_objective and more plot examples
+  :pr:`830` by :user:`Holger Nahrstaedt <holgern>` and
+  `Sigurd Carlsen <sigurdcarlsen>`
+
+:mod:`skopt.searchcv`
+---------------------
+- |Fix| Fix searchcv rank (issue #831)
+  :pr:`832` by :user:`Holger Nahrstaedt <holgern>`
+
+:mod:`skopt.space`
+------------------
+* |Fix| Fix integer normalize by using round()
+  :pr:`830` by :user:`Holger Nahrstaedt <holgern>`
+
+Miscellaneous
+-------------
+* |Api| Fix doc examples
+* |Fix| Fix license detection in github
+  :pr:`827` by :user:`Holger Nahrstaedt <holgern>`
+* |Enhancement| Add doctest to CI
+
+.. _changes_0_7_1:
+
+Version 0.7.1
+=============
+**February 2020**
+
+:mod:`skopt.space`
+------------------
+* |Fix| Fix categorical space (issue #821)
+  :pr:`823` by :user:`Holger Nahrstaedt <holgern>`
+* |Enhancement| int can be set as dtype to fix issue #790
+  :pr:`807` by :user:`Holger Nahrstaedt <holgern>`
+* |Feature| New StringEncoder, can be used in Categoricals
+* Remove string conversion in Identity
+* |Enhancement| dtype can be set in Integer and Real
+
+Miscellaneous
+-------------
+- Sphinx documentation
+  :pr:`809` by :user:`Holger Nahrstaedt <holgern>`
+- notebooks are replaced by sphinx-gallery
+  :pr:`811` by :user:`Holger Nahrstaedt <holgern>`
+- Improve sphinx doc
+  :pr:`819` by :user:`Holger Nahrstaedt <holgern>`
+- Old pdoc scripts are removed and replaced by sphinx
+  :pr:`822` by :user:`Holger Nahrstaedt <holgern>`
+
+.. _changes_0_7:
+
 Version 0.7
 ===========
+**January 2020**
 
-New features
-------------
-
-* Models queue has now a customizable size (model_queue_size).
-* Add log-uniform prior to Integer space
-* Support for plotting categorical dimensions
+:mod:`skopt.optimizer`
+----------------------
+- |Enhancement| Models queue has now a customizable size (model_queue_size).
+  :pr:`803` by :user:`Kajetan Tukendorf <Bacoknight>` and
+  :user:`Holger Nahrstaedt <holgern>`
+- |Enhancement| Add log-uniform prior to Integer space
+  :pr:`805` by :user:`Alex Liebscher <liebscher>`
 
-Bug fixes
----------
+:mod:`skopt.plots`
+------------------
+- |Enhancement| Support for plotting categorical dimensions
+  :pr:`806` by :user:`jkleint <jkleint>`
 
-* Allow BayesSearchCV to work with sklearn 0.21
-* Reduce the amount of deprecation warnings in unit tests
+:mod:`skopt.searchcv`
+---------------------
+- |Fix| Allow BayesSearchCV to work with sklearn 0.21.
+  :pr:`777` by :user:`Kit Choi <kitchoi>`
 
-Maintenance
------------
+Miscellaneous
+-------------
 
-* joblib instead of sklearn.externals.joblib
-* Improve travis CI unit tests (Different sklearn version are checked)
-* Removed `versioneer` support, to keep things simple and to fix pypi deploy
+- Reduce the amount of deprecation warnings in unit tests
+  :pr:`808` by :user:`Holger Nahrstaedt <holgern>`
+- Reduce the amount of deprecation warnings in unit tests
+  :pr:`802` by :user:`Alex Liebscher <liebscher>`
+- joblib instead of sklearn.externals.joblib
+  :pr:`776` by :user:`Vince Jankovics <vakker>`
+- Improve travis CI unit tests (Different sklearn version are checked)
+  :pr:`804` by :user:`Holger Nahrstaedt <holgern>`
+- Removed `versioneer` support, to keep things simple and to fix pypi deploy
+  :pr:`816` by :user:`Holger Nahrstaedt <holgern>`
diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 58729ea96..b855b0945 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -1,11 +1,12 @@
-Version 0.8
-===========
-**not released**
-New features
-------------
 
-Bug fixes
----------
+.. currentmodule:: skopt
+
+.. _changes_0_8:
+
+Version 0.8.0
+=============
+
+**In Development**
+
+
 
-Maintenance
------------

From a986ca5b1d0dd18465eacba4b035e4c5306f9635 Mon Sep 17 00:00:00 2001
From: Maximilian Pierzyna <mail@maximilian-pierzyna.de>
Date: Sun, 23 Feb 2020 12:13:21 +0100
Subject: [PATCH 117/265] Updated broken link to introduction of bayesian
 optimization example

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 019d68147..4ce330eda 100644
--- a/README.rst
+++ b/README.rst
@@ -97,7 +97,7 @@ class:
 
 
 Read our `introduction to bayesian
-optimization <https://scikit-optimize.github.io/notebooks/bayesian-optimization.html>`__
+optimization <https://scikit-optimize.github.io/stable/auto_examples/bayesian-optimization.html>`__
 and the other
 `examples <https://github.com/scikit-optimize/scikit-optimize/tree/master/examples>`__.
 

From 0ee850e903fe767d8bc7d24ce263b585a09aa7c5 Mon Sep 17 00:00:00 2001
From: Maximilian Pierzyna <mail@maximilian-pierzyna.de>
Date: Sun, 23 Feb 2020 12:50:36 +0100
Subject: [PATCH 118/265] Changed remaining example directory links to examples
 in documentation.

---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 4ce330eda..46a3a5983 100644
--- a/README.rst
+++ b/README.rst
@@ -30,7 +30,7 @@ Important links
 -  Static documentation - `Static
    documentation <https://scikit-optimize.github.io/>`__
 -  Example notebooks - can be found in the
-   `examples directory <https://github.com/scikit-optimize/scikit-optimize/tree/master/examples>`_.
+   `here <https://scikit-optimize.github.io/dev/auto_examples/index.html>`_.
 -  Issue tracker -
    https://github.com/scikit-optimize/scikit-optimize/issues
 -  Releases - https://pypi.python.org/pypi/scikit-optimize
@@ -99,7 +99,7 @@ class:
 Read our `introduction to bayesian
 optimization <https://scikit-optimize.github.io/stable/auto_examples/bayesian-optimization.html>`__
 and the other
-`examples <https://github.com/scikit-optimize/scikit-optimize/tree/master/examples>`__.
+`examples <https://scikit-optimize.github.io/dev/auto_examples/index.html>`__.
 
 
 Development

From a67298dbf8e566b29b9e890f916f05fb0550c549 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 23 Feb 2020 14:17:21 +0100
Subject: [PATCH 119/265] Fix link to whats new and update changelog for 0.8

---
 doc/templates/index.html |  6 +++---
 doc/whats_new/v0.7.rst   | 10 +++++-----
 doc/whats_new/v0.8.rst   | 16 +++++++++++++++-
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/doc/templates/index.html b/doc/templates/index.html
index 76bbb34f2..62a0dbee8 100644
--- a/doc/templates/index.html
+++ b/doc/templates/index.html
@@ -8,7 +8,7 @@
         <h1 class="sk-landing-header text-dark text-monospace">scikit-optimize</h1>
         <h4 class="sk-landing-subheader text-dark font-italic mb-3">Sequential model-based optimization in Python</h4>
         <a class="btn sk-landing-btn mb-1" href="{{ pathto('getting_started') }}" role="button">Getting Started</a>
-        <a class="btn sk-landing-btn mb-1" href="whats_new/v{{ release }}.html" role="button">What's New in {{ release }}</a>
+        <a class="btn sk-landing-btn mb-1" href="whats_new/v{{ version }}.html" role="button">What's New in {{ release }}</a>
         <a class="btn sk-landing-btn mb-1" href="https://github.com/scikit-optimize/scikit-optimize" role="button">GitHub</a>
       </div>
       <div class="col-md-6 d-flex">
@@ -108,8 +108,8 @@ <h4 class="sk-landing-call-header">News</h4>
         <li><strong>On-going development:</strong>
         <a href="https://scikit-optimize.github.io/dev/whats_new.html"><strong>What's new</strong> (Changelog)</a>
         </li>
-        <li><strong>Feb 2020.</strong> scikit-optimize 0.7.2  (<a href="whats_new/v0.7.2.html">Changelog</a>).
-        <li><strong>Feb 2020.</strong> scikit-optimize 0.7.1  (<a href="whats_new/v0.7.1.html">Changelog</a>).
+        <li><strong>Feb 2020.</strong> scikit-optimize 0.7.2  (<a href="whats_new/v0.7.html#version-0-7-2">Changelog</a>).
+        <li><strong>Feb 2020.</strong> scikit-optimize 0.7.1  (<a href="whats_new/v0.7.html#version-0-7-1">Changelog</a>).
         <li><strong>Jan 2020.</strong> scikit-optimize 0.7  (<a href="whats_new/v0.7.html">Changelog</a>).
         <li><strong>April 2018.</strong> scikit-optimize 0.6  (<a href="whats_new/v0.6.html">Changelog</a>).
         <li><strong>Mar 2018.</strong> scikit-optimize 0.5  (<a href="whats_new/v0.5.html">Changelog</a>).
diff --git a/doc/whats_new/v0.7.rst b/doc/whats_new/v0.7.rst
index 7eb0170b1..3cad94bc1 100644
--- a/doc/whats_new/v0.7.rst
+++ b/doc/whats_new/v0.7.rst
@@ -13,7 +13,7 @@ Version 0.7.2
 - |Feature| update_next() and get_results() added to Optimize and
   add more examples
   :pr:`837` by :user:`Holger Nahrstaedt <holgern>` and
-  `Sigurd Carlsen <sigurdcarlsen>`
+  :user:`Sigurd Carlsen <sigurdcarlsen>`
 - |Fix| Fix random forest regressor (Add missing min_impurity_decrease)
   :pr:`829` by :user:`Holger Nahrstaedt <holgern>`
 
@@ -31,7 +31,7 @@ Version 0.7.2
 ------------------
 - |Enhancement| Add more parameter to plot_objective and more plot examples
   :pr:`830` by :user:`Holger Nahrstaedt <holgern>` and
-  `Sigurd Carlsen <sigurdcarlsen>`
+  :user:`Sigurd Carlsen <sigurdcarlsen>`
 
 :mod:`skopt.searchcv`
 ---------------------
@@ -45,7 +45,7 @@ Version 0.7.2
 
 Miscellaneous
 -------------
-* |Api| Fix doc examples
+* |Fix| Fix doc examples
 * |Fix| Fix license detection in github
   :pr:`827` by :user:`Holger Nahrstaedt <holgern>`
 * |Enhancement| Add doctest to CI
@@ -104,9 +104,9 @@ Version 0.7
 Miscellaneous
 -------------
 
-- Reduce the amount of deprecation warnings in unit tests
+- |Fix| Reduce the amount of deprecation warnings in unit tests
   :pr:`808` by :user:`Holger Nahrstaedt <holgern>`
-- Reduce the amount of deprecation warnings in unit tests
+- |Fix| Reduce the amount of deprecation warnings in unit tests
   :pr:`802` by :user:`Alex Liebscher <liebscher>`
 - joblib instead of sklearn.externals.joblib
   :pr:`776` by :user:`Vince Jankovics <vakker>`
diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index b855b0945..f179e8f3c 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -5,8 +5,22 @@
 
 Version 0.8.0
 =============
-
 **In Development**
 
+:mod:`skopt.sampler`
+--------------------
+- |MajorFeature| Initial sampling generation
+  from latin hypercube, sobol, hammersly and halton
+  is possible and can be set in all optimizers
+  :pr:`835` by :user:`Holger Nahrstaedt <holgern>`
+- |Enhancement| Improve sampler and add grid sampler
+  :pr:`851` by :user:`Holger Nahrstaedt <holgern>`
 
+Miscellaneous
+-------------
+- Improve circle ci
+  :pr:`852` by :user:`Holger Nahrstaedt <holgern>`
+- Add project toml and adapt minimal numpy, scipy, pyyaml and
+  joblib version in setup.py
+  :pr:`850` by :user:`Holger Nahrstaedt <holgern>`
 

From fbb34932c6d373a10d49ace59a0fab415ce0c278 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 23 Feb 2020 14:26:50 +0100
Subject: [PATCH 120/265] Fix missing include

---
 doc/whats_new/v0.8.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index f179e8f3c..548fdc612 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -1,3 +1,4 @@
+.. include:: _contributors.rst
 
 .. currentmodule:: skopt
 

From 49f3dce6b88aeddeb1df80275cc26394087a1810 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 23 Feb 2020 14:42:38 +0100
Subject: [PATCH 121/265] Fix link to whats_new

---
 doc/conf.py              | 2 +-
 doc/templates/index.html | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index a1945bfc5..0c69085df 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -34,7 +34,7 @@
 # -- Project information -----------------------------------------------------
 
 project = 'scikit-optimize'
-copyright = '2017 - 2020, The scikit-optimize contributors.'
+copyright = '2017 - 2020, scikit-optimize contributors (BSD License)'
 author = 'The scikit-optimize contributors'
 
 # The short X.Y version
diff --git a/doc/templates/index.html b/doc/templates/index.html
index 62a0dbee8..98a369032 100644
--- a/doc/templates/index.html
+++ b/doc/templates/index.html
@@ -110,7 +110,7 @@ <h4 class="sk-landing-call-header">News</h4>
         </li>
         <li><strong>Feb 2020.</strong> scikit-optimize 0.7.2  (<a href="whats_new/v0.7.html#version-0-7-2">Changelog</a>).
         <li><strong>Feb 2020.</strong> scikit-optimize 0.7.1  (<a href="whats_new/v0.7.html#version-0-7-1">Changelog</a>).
-        <li><strong>Jan 2020.</strong> scikit-optimize 0.7  (<a href="whats_new/v0.7.html">Changelog</a>).
+        <li><strong>Jan 2020.</strong> scikit-optimize 0.7  (<a href="whats_new/v0.7.html#version-0-7">Changelog</a>).
         <li><strong>April 2018.</strong> scikit-optimize 0.6  (<a href="whats_new/v0.6.html">Changelog</a>).
         <li><strong>Mar 2018.</strong> scikit-optimize 0.5  (<a href="whats_new/v0.5.html">Changelog</a>).
         <li><strong>Aug 2017.</strong> scikit-optimize 0.4 (<a href="whats_new/v0.4.html">Changelog</a>).

From 9090093e937e221941032d4e2d0d2c88ec58c2d6 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 23 Feb 2020 16:37:56 +0100
Subject: [PATCH 122/265] Fix MANIFEST.in

---
 MANIFEST.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 77406979c..f0036cd4a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,6 +2,6 @@ include *.md
 include *.rst
 recursive-include doc *
 recursive-include examples *
-include COPYING
+include LICENSE
 include README.rst
 include pyproject.toml
\ No newline at end of file

From 0b3a54b60a35659105b99e27640d30037dac1bed Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgern@users.noreply.github.com>
Date: Sun, 23 Feb 2020 16:42:15 +0100
Subject: [PATCH 123/265] Replace dev by stable in links to examples

---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 46a3a5983..f0d56539f 100644
--- a/README.rst
+++ b/README.rst
@@ -30,7 +30,7 @@ Important links
 -  Static documentation - `Static
    documentation <https://scikit-optimize.github.io/>`__
 -  Example notebooks - can be found in the
-   `here <https://scikit-optimize.github.io/dev/auto_examples/index.html>`_.
+   `here <https://scikit-optimize.github.io/stable/auto_examples/index.html>`_.
 -  Issue tracker -
    https://github.com/scikit-optimize/scikit-optimize/issues
 -  Releases - https://pypi.python.org/pypi/scikit-optimize
@@ -99,7 +99,7 @@ class:
 Read our `introduction to bayesian
 optimization <https://scikit-optimize.github.io/stable/auto_examples/bayesian-optimization.html>`__
 and the other
-`examples <https://scikit-optimize.github.io/dev/auto_examples/index.html>`__.
+`examples <https://scikit-optimize.github.io/stable/auto_examples/index.html>`__.
 
 
 Development

From af526df2e92330bf1d30752245ccb34084ded5a3 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 23 Feb 2020 19:44:53 +0100
Subject: [PATCH 124/265] Add twine check to assure that package can be
 uploaded to pypi

---
 .travis.yml                       |  6 ++++++
 build_tools/travis/install.sh     |  7 ++++++-
 build_tools/travis/test_script.sh | 12 +++++++++++-
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 6c76b4ceb..4253356a3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -48,6 +48,12 @@ matrix:
       env: DISTRIB="conda" PYTHON_VERSION="3.8.1" COVERAGE="false"
            NUMPY_VERSION="1.18.1" SCIPY_VERSION="1.4.1" PYAML_VERSION="19.12.0"
            SCIKIT_LEARN_VERSION="0.22.1" JOBLIB_VERSION="0.14.1"
+    - name: "Python 3.7 - sdist check"
+      python: "3.7"
+      env: DISTRIB="conda" PYTHON_VERSION="3.7"
+           NUMPY_VERSION="*" SCIPY_VERSION="*" PYAML_VERSION="*"
+           SCIKIT_LEARN_VERSION="*" MATPLOTLIB_VERSION="*" COVERAGE="false"
+           JOBLIB_VERSION="*" SDIST="true"
 
 
 install: source build_tools/travis/install.sh
diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index 04cd1bf92..ebdb8aee1 100644
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -64,7 +64,12 @@ if [[ "$COVERAGE" == "true" ]]; then
     pip install pytest-cov coverage coveralls
 fi
 
-pip install -e '.[plots]'
+if [[ "$SDIST" == "true" ]]; then
+    pip install -e '.[plots]'
+else
+    python setup.py sdist
+    pip install twine
+fi
 python --version
 python -c "import numpy; print('numpy %s' % numpy.__version__)"
 python -c "import scipy; print('scipy %s' % scipy.__version__)"
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index c38e5fa0c..154b220f2 100644
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -54,4 +54,14 @@ run_tests() {
     $TEST_CMD skopt
 }
 
-run_tests
\ No newline at end of file
+run_package_check() {
+
+    TEST_CMD="twine check dist/*"
+    $TEST_CMD
+}
+
+if [[ "$SDIST" == "true" ]]; then
+    run_package_check
+else
+    run_tests
+fi
\ No newline at end of file

From c3773764484adec660a4cab385af62a8d3ce1cb2 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 23 Feb 2020 19:48:23 +0100
Subject: [PATCH 125/265] Fix typo

---
 build_tools/travis/install.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index ebdb8aee1..f6237c0a1 100644
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -65,10 +65,10 @@ if [[ "$COVERAGE" == "true" ]]; then
 fi
 
 if [[ "$SDIST" == "true" ]]; then
-    pip install -e '.[plots]'
-else
     python setup.py sdist
     pip install twine
+else
+    pip install -e '.[plots]'
 fi
 python --version
 python -c "import numpy; print('numpy %s' % numpy.__version__)"

From c2798aad832aee006484dc48b002f7452a41e241 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 23 Feb 2020 19:52:11 +0100
Subject: [PATCH 126/265] Remove second python 3.7 travis test

---
 .travis.yml | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 4253356a3..8521111e3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -34,16 +34,10 @@ matrix:
     - name: "Python 3.7 - scikit 0.21.3"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.15.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
+         NUMPY_VERSION="1.16.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
          SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="*" COVERAGE="true"
-         JOBLIB_VERSION="0.12"
-    - name: "Python 3.7 - scikit 0.22.1"
-      python: "3.7"
-      env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.16.0" SCIPY_VERSION="1.2.0" PYAML_VERSION="18.11.0"
-         SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.13"
-    - name: "Python 3.8  latest package versions"
+    - name: "Python 3.8  - scikit 0.22.1"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.8.1" COVERAGE="false"
            NUMPY_VERSION="1.18.1" SCIPY_VERSION="1.4.1" PYAML_VERSION="19.12.0"

From 801d85643d8aa31264745fd237bf99236c98ec95 Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgern@users.noreply.github.com>
Date: Sun, 23 Feb 2020 20:52:13 +0100
Subject: [PATCH 127/265] Fix link to examples

---
 README.rst | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 2fac87ca4..ab80c31da 100644
--- a/README.rst
+++ b/README.rst
@@ -29,8 +29,7 @@ Important links
 
 -  Static documentation - `Static
    documentation <https://scikit-optimize.github.io/>`__
--  Example notebooks - can be found in the
-   `here <https://scikit-optimize.github.io/stable/auto_examples/index.html>`_.
+-  Example notebooks - can be found in examples_.
 -  Issue tracker -
    https://github.com/scikit-optimize/scikit-optimize/issues
 -  Releases - https://pypi.python.org/pypi/scikit-optimize
@@ -98,8 +97,7 @@ class:
 
 Read our `introduction to bayesian
 optimization <https://scikit-optimize.github.io/stable/auto_examples/bayesian-optimization.html>`__
-and the other
-`examples <https://scikit-optimize.github.io/stable/auto_examples/index.html>`__.
+and the other examples_.
 
 
 Development
@@ -198,3 +196,4 @@ recognition, feel free to add them to the "Made possible by" list.
    :target: https://gitter.im/scikit-optimize/Lobby
 .. |Zenodo DOI| image:: https://zenodo.org/badge/54340642.svg
    :target: https://zenodo.org/badge/latestdoi/54340642
+.. _examples: https://scikit-optimize.github.io/stable/auto_examples/index.html

From f20936914c7ea17435b0c83d3d22d127a48da15c Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Sun, 23 Feb 2020 20:54:06 +0100
Subject: [PATCH 128/265] Show command

---
 build_tools/travis/test_script.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index 154b220f2..42eb17249 100644
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -57,6 +57,7 @@ run_tests() {
 run_package_check() {
 
     TEST_CMD="twine check dist/*"
+    set -x
     $TEST_CMD
 }
 

From 4f119504b60e257098e86f1efed865d80e6b8024 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 11:44:29 +0100
Subject: [PATCH 129/265] Fill user guide with some content

---
 doc/modules/acquisition.rst        | 45 ++++++++++++++++++++++++++++++
 doc/modules/bayessearchcv.rst      | 16 +++++++++++
 doc/modules/minimize_functions.rst | 43 +++++++++++++++++++++++++---
 doc/modules/space.rst              | 16 +++++++++--
 doc/modules/utils.rst              | 28 +++++++++++++++++++
 5 files changed, 142 insertions(+), 6 deletions(-)

diff --git a/doc/modules/acquisition.rst b/doc/modules/acquisition.rst
index 3ecf2e116..5332833b8 100644
--- a/doc/modules/acquisition.rst
+++ b/doc/modules/acquisition.rst
@@ -4,3 +4,48 @@
 
 Acquisition
 ===========
+Function to minimize over the posterior distribution.
+
+:class:`gaussian_lcb`
+---------------------
+Use the lower confidence bound to estimate the acquisition
+values.
+
+The trade-off between exploitation and exploration is left to
+be controlled by the user through the parameter ``kappa``.
+
+:class:`gaussian_pi`
+--------------------
+Use the probability of improvement to calculate the acquisition values.
+
+The conditional probability `P(y=f(x) | x)` form a gaussian with a
+certain mean and standard deviation approximated by the model.
+
+The PI condition is derived by computing ``E[u(f(x))]``
+where ``u(f(x)) = 1``, if ``f(x) < y_opt`` and ``u(f(x)) = 0``,
+if ``f(x) > y_opt``.
+
+This means that the PI condition does not care about how "better" the
+predictions are than the previous values, since it gives an equal reward
+to all of them.
+
+Note that the value returned by this function should be maximized to
+obtain the ``X`` with maximum improvement.
+
+
+:class:`gaussian_ei`
+--------------------
+Use the expected improvement to calculate the acquisition values.
+
+The conditional probability `P(y=f(x) | x)` form a gaussian with a certain
+mean and standard deviation approximated by the model.
+
+The EI condition is derived by computing ``E[u(f(x))]``
+where ``u(f(x)) = 0``, if ``f(x) > y_opt`` and ``u(f(x)) = y_opt - f(x)``,
+if ``f(x) < y_opt``.
+
+This solves one of the issues of the PI condition by giving a reward
+proportional to the amount of improvement got.
+
+Note that the value returned by this function should be maximized to
+obtain the ``X`` with maximum improvement.
diff --git a/doc/modules/bayessearchcv.rst b/doc/modules/bayessearchcv.rst
index 1a2b1f383..dfa402c12 100644
--- a/doc/modules/bayessearchcv.rst
+++ b/doc/modules/bayessearchcv.rst
@@ -1,3 +1,5 @@
+.. currentmodule:: skopt
+
 .. _bayessearchcv:
 
 BayesSearchCV, a GridSearchCV compatible estimator
@@ -5,3 +7,17 @@ BayesSearchCV, a GridSearchCV compatible estimator
 
 Use ``BayesSearchCV`` as a replacement for scikit-learn's GridSearchCV.
 
+BayesSearchCV implements a "fit" and a "score" method.
+It also implements "predict", "predict_proba", "decision_function",
+"transform" and "inverse_transform" if they are implemented in the
+estimator used.
+
+The parameters of the estimator used to apply these methods are optimized
+by cross-validated search over parameter settings.
+
+In contrast to GridSearchCV, not all parameter values are tried out, but
+rather a fixed number of parameter settings is sampled from the specified
+distributions. The number of parameter settings that are tried is
+given by n_iter.
+
+Parameters are presented as a list of :class:`skopt.space.Dimension` objects.
\ No newline at end of file
diff --git a/doc/modules/minimize_functions.rst b/doc/modules/minimize_functions.rst
index 0e738a115..1b05cd551 100644
--- a/doc/modules/minimize_functions.rst
+++ b/doc/modules/minimize_functions.rst
@@ -8,7 +8,42 @@ These are easy to get started with. They mirror the ``scipy.optimize``
 API and provide a high level interface to various pre-configured
 optimizers.
 
-* :class:`dummy_minimize`
-* :class:`forest_minimize`
-* :class:`gbrt_minimize`
-* :class:`gp_minimize`
+:class:`dummy_minimize`
+-----------------------
+Random search by uniform sampling within the given bounds.
+
+:class:`forest_minimize`
+------------------------
+Sequential optimisation using decision trees.
+
+A tree based regression model is used to model the expensive to evaluate
+function `func`. The model is improved by sequentially evaluating
+the expensive function at the next best point. Thereby finding the
+minimum of `func` with as few evaluations as possible.
+
+:class:`gbrt_minimize`
+----------------------
+Sequential optimization using gradient boosted trees.
+
+Gradient boosted regression trees are used to model the (very)
+expensive to evaluate function `func`. The model is improved
+by sequentially evaluating the expensive function at the next
+best point. Thereby finding the minimum of `func` with as
+few evaluations as possible.
+
+:class:`gp_minimize`
+--------------------
+Bayesian optimization using Gaussian Processes.
+
+If every function evaluation is expensive, for instance
+when the parameters are the hyperparameters of a neural network
+and the function evaluation is the mean cross-validation score across
+ten folds, optimizing the hyperparameters by standard optimization
+routines would take for ever!
+
+The idea is to approximate the function using a Gaussian process.
+In other words the function values are assumed to follow a multivariate
+gaussian. The covariance of the function values are given by a
+GP kernel between the parameters. Then a smart choice to choose the
+next parameter to evaluate can be made by the acquisition function
+over the Gaussian prior which is much quicker to evaluate.
diff --git a/doc/modules/space.rst b/doc/modules/space.rst
index 4525732d6..9eac0857c 100644
--- a/doc/modules/space.rst
+++ b/doc/modules/space.rst
@@ -2,6 +2,18 @@
 
 .. _space:
 
-Space define the optimization space
-===================================
+Space
+=====
+:class:`Space` define the optimization space which contains one or multiple dimensions of the following type:
 
+:class:`Real`
+-------------
+Search space dimension that can take on any real value.
+
+:class:`Integer`
+----------------
+Search space dimension that can take on integer values.
+
+:class:`Categorical`
+--------------------
+Search space dimension that can take on categorical values.
\ No newline at end of file
diff --git a/doc/modules/utils.rst b/doc/modules/utils.rst
index a7e7fa0dd..62134ca88 100644
--- a/doc/modules/utils.rst
+++ b/doc/modules/utils.rst
@@ -7,3 +7,31 @@ Utility functions
 This is a list of public utility functions. Other functions in this module
 are meant for internal use.
 
+:func:`use_named_args`
+----------------------
+This utility function allows it to use objective functions with named arguments::
+
+    >>> # Define the search-space dimensions. They must all have names!
+    >>> from skopt.space import Real
+    >>> from skopt.utils import use_named_args
+    >>> dim1 = Real(name='foo', low=0.0, high=1.0)
+    >>> dim2 = Real(name='bar', low=0.0, high=1.0)
+    >>> dim3 = Real(name='baz', low=0.0, high=1.0)
+    >>>
+    >>> # Gather the search-space dimensions in a list.
+    >>> dimensions = [dim1, dim2, dim3]
+    >>>
+    >>> # Define the objective function with named arguments
+    >>> # and use this function-decorator to specify the
+    >>> # search-space dimensions.
+    >>> @use_named_args(dimensions=dimensions)
+    ... def my_objective_function(foo, bar, baz):
+    ...     return foo ** 2 + bar ** 4 + baz ** 8
+
+:func:`dump`
+------------
+Store an skopt optimization result into a file.
+
+:func:`load`
+------------
+Reconstruct a skopt optimization result from a file persisted with :func:`dump`.

From 62bd464499632bdf85b2e81268ec94f7f226a80a Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 11:48:07 +0100
Subject: [PATCH 130/265] Fix pypy circleci

---
 build_tools/circle/build_test_pypy.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_tools/circle/build_test_pypy.sh b/build_tools/circle/build_test_pypy.sh
index a7a86eb9a..c858d9add 100755
--- a/build_tools/circle/build_test_pypy.sh
+++ b/build_tools/circle/build_test_pypy.sh
@@ -27,7 +27,7 @@ pip install --extra-index-url https://antocuni.github.io/pypy-wheels/manylinux20
 
 # Install Cython directly
 pip install https://antocuni.github.io/pypy-wheels/ubuntu/Cython/Cython-0.29.14-py3-none-any.whl
-pip install sphinx numpydoc docutils joblib pillow pytest
+pip install sphinx numpydoc docutils joblib pillow pytest matplotlib
 
 ccache -M 512M
 export CCACHE_COMPRESS=1

From 95627d7a6b4558e6d408b46292a9a895e60cbe1b Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 11:50:30 +0100
Subject: [PATCH 131/265] Activate test_doc

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 8521111e3..bd7dded86 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -53,7 +53,7 @@ matrix:
 install: source build_tools/travis/install.sh
 script:
   - bash build_tools/travis/test_script.sh
-  # - bash build_tools/travis/test_docs.sh
+  - bash build_tools/travis/test_docs.sh
 after_success: source build_tools/travis/after_success.sh
 deploy:
   provider: pypi

From 5d4d1819dfffd69b8bfc136ffa7c78c971be33e2 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 12:18:15 +0100
Subject: [PATCH 132/265] Switch to doc for doctest

---
 build_tools/travis/test_docs.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/build_tools/travis/test_docs.sh b/build_tools/travis/test_docs.sh
index d43b480fa..3a01d2458 100644
--- a/build_tools/travis/test_docs.sh
+++ b/build_tools/travis/test_docs.sh
@@ -2,5 +2,9 @@
 
 set -e
 set -x
-
-make test-doc
+if [[ "$SDIST" != "true" ]]; then
+    pushd .
+    cd doc
+    make doctest
+    popd
+fi

From e5666267eb2d638e7989117317915caaeb5c4381 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 12:54:38 +0100
Subject: [PATCH 133/265] At first code to test in getting_started

---
 build_tools/travis/test_docs.sh |  5 +----
 doc/getting_started.rst         | 40 ++++++++++++++++++---------------
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/build_tools/travis/test_docs.sh b/build_tools/travis/test_docs.sh
index 3a01d2458..3df03926f 100644
--- a/build_tools/travis/test_docs.sh
+++ b/build_tools/travis/test_docs.sh
@@ -3,8 +3,5 @@
 set -e
 set -x
 if [[ "$SDIST" != "true" ]]; then
-    pushd .
-    cd doc
-    make doctest
-    popd
+    make test-doc
 fi
diff --git a/doc/getting_started.rst b/doc/getting_started.rst
index 39379ef45..cd077c689 100644
--- a/doc/getting_started.rst
+++ b/doc/getting_started.rst
@@ -1,8 +1,10 @@
-.. currentmodule:: skopt
 
 ===============
 Getting started
 ===============
+
+.. currentmodule:: skopt
+
 Scikit-Optimize, or ``skopt``, is a simple and efficient library to
 minimize (very) expensive and noisy black-box functions. It implements
 several methods for sequential model-based optimization. ``skopt`` aims
@@ -27,27 +29,29 @@ Finding a minimum
 Find the minimum of the noisy function ``f(x)`` over the range ``-2 < x < 2``
 with :class:`skopt`::
 
-  import numpy as np
-  from skopt import gp_minimize
-
-  def f(x):
-      return (np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) *
-              np.random.randn() * 0.1)
-
-  res = gp_minimize(f, [(-2.0, 2.0)])
+    >>> import numpy as np
+    >>> from skopt import gp_minimize
+    >>> np.random.seed(123)
+    >>> def f(x):
+    ...     return (np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) *
+    ...             np.random.randn() * 0.1)
+    >>>
+    >>> res = gp_minimize(f, [(-2.0, 2.0)], n_calls=20)
+    >>> print("x*=%.2f f(x*)=%.2f" % (res.x[0], res.fun))
+    x*=0.85 f(x*)=-0.05
 
 For more control over the optimization loop you can use the :class:`skopt.Optimizer`
 class::
 
-  from skopt import Optimizer
-
-  opt = Optimizer([(-2.0, 2.0)])
-
-  for i in range(20):
-      suggested = opt.ask()
-      y = f(suggested)
-      opt.tell(suggested, y)
-      print('iteration:', i, suggested, y)
+    >>> from skopt import Optimizer
+    >>> opt = Optimizer([(-2.0, 2.0)])
+    >>>
+    >>> for i in range(20):
+    ...     suggested = opt.ask()
+    ...     y = f(suggested)
+    ...     res = opt.tell(suggested, y)
+    >>> print("x*=%.2f f(x*)=%.2f" % (res.x[0], res.fun))
+    x*=-0.10 f(x*)=-0.06
 
 For more read our :ref:`sphx_glr_auto_examples_bayesian-optimization.py` and the other
 `examples <auto_examples/index.html>`_.

From 51b8f2acd35efd978dafbfd3fdf5887e34b87158 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 13:07:37 +0100
Subject: [PATCH 134/265] Use pytest conf in setup.cfg

---
 pytest.ini | 6 ------
 setup.cfg  | 8 +++++++-
 2 files changed, 7 insertions(+), 7 deletions(-)
 delete mode 100644 pytest.ini

diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index 590369315..000000000
--- a/pytest.ini
+++ /dev/null
@@ -1,6 +0,0 @@
-[pytest]
-markers =
-    fast_test: marks tests as fast (deselect with '-m "not fast_test"')
-    slow_test: marks tests as slow (deselect with '-m "not slow_test"')
-    slow: marks tests as slow (deselect with '-m "not slow"')
-    serial
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index 82f184d34..dcc9caad1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,11 +13,17 @@ addopts =
     --ignore maint_tools
     --doctest-modules
     --disable-pytest-warnings
-    -rs
+    -rxXs
 
 filterwarnings =
     ignore:the matrix subclass:PendingDeprecationWarning
 
+markers =
+    fast_test: marks tests as fast (deselect with '-m "not fast_test"')
+    slow_test: marks tests as slow (deselect with '-m "not slow_test"')
+    slow: marks tests as slow (deselect with '-m "not slow"')
+    serial
+
 # used by our travis auto-deployment system
 # needs changing if scikit-optimize ever stops being
 # a pure python module

From 74d02b6400841bec34eeba35c9c024ac758db4ca Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 13:21:46 +0100
Subject: [PATCH 135/265] Fix doc test

---
 doc/getting_started.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/getting_started.rst b/doc/getting_started.rst
index cd077c689..2421f7eb2 100644
--- a/doc/getting_started.rst
+++ b/doc/getting_started.rst
@@ -38,7 +38,7 @@ with :class:`skopt`::
     >>>
     >>> res = gp_minimize(f, [(-2.0, 2.0)], n_calls=20)
     >>> print("x*=%.2f f(x*)=%.2f" % (res.x[0], res.fun))
-    x*=0.85 f(x*)=-0.05
+    x*=0.85 f(x*)=-0.06
 
 For more control over the optimization loop you can use the :class:`skopt.Optimizer`
 class::

From bdc21a6db0fafef66178614969e1d25e9a7f540c Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 13:42:36 +0100
Subject: [PATCH 136/265] Fix second test

---
 doc/getting_started.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/getting_started.rst b/doc/getting_started.rst
index 2421f7eb2..4836c0758 100644
--- a/doc/getting_started.rst
+++ b/doc/getting_started.rst
@@ -51,7 +51,7 @@ class::
     ...     y = f(suggested)
     ...     res = opt.tell(suggested, y)
     >>> print("x*=%.2f f(x*)=%.2f" % (res.x[0], res.fun))
-    x*=-0.10 f(x*)=-0.06
+    x*=0.27 f(x*)=-0.15
 
 For more read our :ref:`sphx_glr_auto_examples_bayesian-optimization.py` and the other
 `examples <auto_examples/index.html>`_.

From 96a5628e0d44a846f16e0c4bd938dff6553e41de Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 14:22:17 +0100
Subject: [PATCH 137/265] Fix issue #624

cmap of branin plot has the same colormap as the objective plot
---
 examples/plots/visualizing-results.py | 3 ++-
 examples/strategy-comparison.py       | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/plots/visualizing-results.py b/examples/plots/visualizing-results.py
index 88cb0e0f4..cb5a0cc54 100644
--- a/examples/plots/visualizing-results.py
+++ b/examples/plots/visualizing-results.py
@@ -81,7 +81,8 @@ def plot_branin():
 
     cm = ax.pcolormesh(x_ax, y_ax, fx,
                        norm=LogNorm(vmin=fx.min(),
-                                    vmax=fx.max()))
+                                    vmax=fx.max()),
+                       cmap='viridis_r')
 
     minima = np.array([[-np.pi, 12.275], [+np.pi, 2.275], [9.42478, 2.475]])
     ax.plot(minima[:, 0], minima[:, 1], "r.", markersize=14,
diff --git a/examples/strategy-comparison.py b/examples/strategy-comparison.py
index 33c659223..d59d4bb7b 100644
--- a/examples/strategy-comparison.py
+++ b/examples/strategy-comparison.py
@@ -55,7 +55,8 @@ def plot_branin():
 
     cm = ax.pcolormesh(x_ax, y_ax, fx,
                        norm=LogNorm(vmin=fx.min(),
-                                    vmax=fx.max()))
+                                    vmax=fx.max()),
+                       cmap='viridis_r')
 
     minima = np.array([[-np.pi, 12.275], [+np.pi, 2.275], [9.42478, 2.475]])
     ax.plot(minima[:, 0], minima[:, 1], "r.", markersize=14,

From 7e3550ecfd69bd712b720cf7f89f289d736d1a73 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 15:47:41 +0100
Subject: [PATCH 138/265] Fix full categorical spaces on Gaussian process based
 regressors

* This fixes issue #589
* Normalize works also when low==high
* normalize_dimensions uses normalize transform also for categorical
---
 doc/whats_new/v0.8.rst        |  4 +++
 skopt/space/transformers.py   |  2 ++
 skopt/tests/test_optimizer.py | 46 ++++++++++++++++++++++++++++++++
 skopt/utils.py                | 49 +++++++++++++++--------------------
 4 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 548fdc612..917ba64d7 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -17,6 +17,10 @@ Version 0.8.0
 - |Enhancement| Improve sampler and add grid sampler
   :pr:`851` by :user:`Holger Nahrstaedt <holgern>`
 
+:mod:`skopt.utils`
+------------------
+- |Fix| Fix Optimizer for full categorical spaces
+
 Miscellaneous
 -------------
 - Improve circle ci
diff --git a/skopt/space/transformers.py b/skopt/space/transformers.py
index b616ac584..dde37f52f 100644
--- a/skopt/space/transformers.py
+++ b/skopt/space/transformers.py
@@ -255,6 +255,8 @@ def transform(self, X):
             if np.any(X < self.low - 1e-8):
                 raise ValueError("All values should"
                                  "be greater than %f" % self.low)
+        if (self.high - self.low) == 0.:
+            return X * 0.
         if self.is_int:
             return (np.round(X).astype(np.int) - self.low) /\
                    (self.high - self.low)
diff --git a/skopt/tests/test_optimizer.py b/skopt/tests/test_optimizer.py
index fab8766a8..e847ab4fb 100644
--- a/skopt/tests/test_optimizer.py
+++ b/skopt/tests/test_optimizer.py
@@ -348,3 +348,49 @@ def test_dimensions_names():
     assert "cat" in names
     assert "int" in names
     assert None not in names
+
+
+@pytest.mark.fast_test
+def test_categorical_only():
+    from skopt.space import Categorical
+    cat1 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
+    cat2 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
+
+    opt = Optimizer([cat1, cat2])
+    for n in range(15):
+        x = opt.ask()
+        res = opt.tell(x, 12 * n)
+    assert len(res.x_iters) == 15
+    next_x = opt.ask(n_points=4)
+    assert len(next_x) == 4
+
+    cat3 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"])
+    cat4 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"])
+
+    opt = Optimizer([cat3, cat4])
+    for n in range(15):
+        x = opt.ask()
+        res = opt.tell(x, 12 * n)
+    assert len(res.x_iters) == 15
+    next_x = opt.ask(n_points=4)
+    assert len(next_x) == 4
+
+
+def test_categorical_only2():
+    from numpy import linalg
+    from skopt.space import Categorical
+    from skopt.learning import GaussianProcessRegressor
+    space = [Categorical([1, 2, 3]), Categorical(["4", "5", "6"])]
+    opt = Optimizer(space,
+                    base_estimator=GaussianProcessRegressor(alpha=1e-7),
+                    acq_optimizer='lbfgs',
+                    n_initial_points=10)
+
+    next_x = opt.ask(n_points=4)
+    assert len(next_x) == 4
+    res = opt.tell(next_x, [linalg.norm(x) for x in next_x])
+    next_x = opt.ask(n_points=4)
+    assert len(next_x) == 4
+    res = opt.tell(next_x, [linalg.norm(int(x)) for x in next_x])
+    next_x = opt.ask(n_points=4)
+    assert len(next_x) == 4
diff --git a/skopt/utils.py b/skopt/utils.py
index 6c769d731..2293a0711 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -594,37 +594,30 @@ def normalize_dimensions(dimensions):
     """
     space = Space(dimensions)
     transformed_dimensions = []
-    if space.is_categorical:
-        # recreate the space and explicitly set transform to "string"
-        # this is a special case for GP based regressors
-        for dimension in space:
+    for dimension in space.dimensions:
+        if isinstance(dimension, Categorical):
             transformed_dimensions.append(Categorical(dimension.categories,
                                                       dimension.prior,
                                                       name=dimension.name,
-                                                      transform="string"))
-
-    else:
-        for dimension in space.dimensions:
-            if isinstance(dimension, Categorical):
-                transformed_dimensions.append(dimension)
-            # To make sure that GP operates in the [0, 1] space
-            elif isinstance(dimension, Real):
-                transformed_dimensions.append(
-                    Real(dimension.low, dimension.high, dimension.prior,
-                         name=dimension.name,
-                         transform="normalize",
-                         dtype=dimension.dtype)
-                    )
-            elif isinstance(dimension, Integer):
-                transformed_dimensions.append(
-                    Integer(dimension.low, dimension.high,
-                            name=dimension.name,
-                            transform="normalize",
-                            dtype=dimension.dtype)
-                    )
-            else:
-                raise RuntimeError("Unknown dimension type "
-                                   "(%s)" % type(dimension))
+                                                      transform="normalize"))
+        # To make sure that GP operates in the [0, 1] space
+        elif isinstance(dimension, Real):
+            transformed_dimensions.append(
+                Real(dimension.low, dimension.high, dimension.prior,
+                     name=dimension.name,
+                     transform="normalize",
+                     dtype=dimension.dtype)
+                )
+        elif isinstance(dimension, Integer):
+            transformed_dimensions.append(
+                Integer(dimension.low, dimension.high,
+                        name=dimension.name,
+                        transform="normalize",
+                        dtype=dimension.dtype)
+                )
+        else:
+            raise RuntimeError("Unknown dimension type "
+                               "(%s)" % type(dimension))
 
     return Space(transformed_dimensions)
 

From dbaa4e57e3708788cf7fae2dfb3cd8dd1a862ff2 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 15:48:51 +0100
Subject: [PATCH 139/265] Add PR number to whats_new

---
 doc/whats_new/v0.8.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 917ba64d7..7d791b466 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -20,6 +20,7 @@ Version 0.8.0
 :mod:`skopt.utils`
 ------------------
 - |Fix| Fix Optimizer for full categorical spaces
+  :pr:`874` by :user:`Holger Nahrstaedt <holgern>`
 
 Miscellaneous
 -------------

From e3ca8396c1ce363cde0970ad27a4cc33068573e8 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 15:49:46 +0100
Subject: [PATCH 140/265] Fix linting

---
 skopt/tests/test_optimizer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skopt/tests/test_optimizer.py b/skopt/tests/test_optimizer.py
index e847ab4fb..b0deb7e60 100644
--- a/skopt/tests/test_optimizer.py
+++ b/skopt/tests/test_optimizer.py
@@ -388,9 +388,9 @@ def test_categorical_only2():
 
     next_x = opt.ask(n_points=4)
     assert len(next_x) == 4
-    res = opt.tell(next_x, [linalg.norm(x) for x in next_x])
+    opt.tell(next_x, [linalg.norm(x) for x in next_x])
     next_x = opt.ask(n_points=4)
     assert len(next_x) == 4
-    res = opt.tell(next_x, [linalg.norm(int(x)) for x in next_x])
+    opt.tell(next_x, [linalg.norm(int(x)) for x in next_x])
     next_x = opt.ask(n_points=4)
     assert len(next_x) == 4

From 20e7c6e03ae05f428cfc4d8dc51a2db498e8d6e9 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 16:16:39 +0100
Subject: [PATCH 141/265] Fix docstring warnings

---
 skopt/utils.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/skopt/utils.py b/skopt/utils.py
index 2293a0711..7d2a5f925 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -202,9 +202,8 @@ def check_x_in_space(x, space):
 
 
 def expected_minimum(res, n_random_starts=20, random_state=None):
-    """
-    Compute the minimum over the predictions of the last surrogate model.
-    Uses `expected_minimum_random_sampling` with `n_random_starts`=100000,
+    """Compute the minimum over the predictions of the last surrogate model.
+    Uses `expected_minimum_random_sampling` with `n_random_starts` = 100000,
     when the space contains any categorical values.
 
     .. note::
@@ -333,8 +332,7 @@ def has_gradients(estimator):
 
 
 def cook_estimator(base_estimator, space=None, **kwargs):
-    """
-    Cook a default estimator.
+    """Cook a default estimator.
 
     For the special base_estimator called "DUMMY" the return value is None.
     This corresponds to sampling points at random, hence there is no need
@@ -342,8 +340,7 @@ def cook_estimator(base_estimator, space=None, **kwargs):
 
     Parameters
     ----------
-    base_estimator : "GP", "RF", "ET", "GBRT", "DUMMY"
-                        or sklearn regressor
+    base_estimator : "GP", "RF", "ET", "GBRT", "DUMMY" or sklearn regressor
         Should inherit from `sklearn.base.RegressorMixin`.
         In addition the `predict` method should have an optional `return_std`
         argument, which returns `std(Y | x)`` along with `E[Y | x]`.
@@ -408,15 +405,14 @@ def cook_estimator(base_estimator, space=None, **kwargs):
 
 
 def cook_initial_point_generator(generator, **kwargs):
-    """
-    Cook a default initial point generator.
+    """Cook a default initial point generator.
 
     For the special generator called "random" the return value is None.
 
     Parameters
     ----------
-    generator : "lhs", "sobol", "halton", "hammersly", "grid", "random"
-                        or InitialPointGenerator instance"
+    generator : "lhs", "sobol", "halton", "hammersly", "grid", "random" \
+            or InitialPointGenerator instance"
         Should inherit from `skopt.sampler.InitialPointGenerator`.
 
     kwargs : dict

From 4e87ab259f65c48dc4748009462791cba485abd8 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 16:18:50 +0100
Subject: [PATCH 142/265] Fix unit tests

---
 skopt/tests/test_optimizer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skopt/tests/test_optimizer.py b/skopt/tests/test_optimizer.py
index b0deb7e60..a49ddc7bb 100644
--- a/skopt/tests/test_optimizer.py
+++ b/skopt/tests/test_optimizer.py
@@ -380,7 +380,7 @@ def test_categorical_only2():
     from numpy import linalg
     from skopt.space import Categorical
     from skopt.learning import GaussianProcessRegressor
-    space = [Categorical([1, 2, 3]), Categorical(["4", "5", "6"])]
+    space = [Categorical([1, 2, 3]), Categorical([4, 5, 6])]
     opt = Optimizer(space,
                     base_estimator=GaussianProcessRegressor(alpha=1e-7),
                     acq_optimizer='lbfgs',
@@ -391,6 +391,6 @@ def test_categorical_only2():
     opt.tell(next_x, [linalg.norm(x) for x in next_x])
     next_x = opt.ask(n_points=4)
     assert len(next_x) == 4
-    opt.tell(next_x, [linalg.norm(int(x)) for x in next_x])
+    opt.tell(next_x, [linalg.norm(x) for x in next_x])
     next_x = opt.ask(n_points=4)
     assert len(next_x) == 4

From 29fa1e1da74e214364bfef7fd3382cffa68ef844 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Mon, 24 Feb 2020 17:06:16 +0100
Subject: [PATCH 143/265] Use GP as base estimator in the ask and tell example

* fix issue #663
---
 examples/ask-and-tell.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/examples/ask-and-tell.py b/examples/ask-and-tell.py
index 21433b7a0..8d489f216 100644
--- a/examples/ask-and-tell.py
+++ b/examples/ask-and-tell.py
@@ -72,7 +72,9 @@ def objective(x, noise_level=noise_level):
 # naming of the ***_minimize()** functions. An important difference is that
 # you do not pass the objective function to the optimizer.
 
-opt = Optimizer([(-2.0, 2.0)], "ET", acq_optimizer="sampling")
+opt = Optimizer([(-2.0, 2.0)], "GP", acq_func="EI",
+                acq_optimizer="sampling",
+                initial_point_generator="lhs")
 
 # To obtain a suggestion for the point at which to evaluate the objective
 # you call the ask() method of opt:
@@ -90,7 +92,8 @@ def objective(x, noise_level=noise_level):
 opt.tell(next_x, f_val)
 
 #########################################################################
-# Like ***_minimize()** the first few points are random suggestions as there
+# Like ***_minimize()** the first few points are suggestions from
+# the initial point generator as there
 # is no data yet with which to fit a surrogate model.
 
 

From fd32aff743f74272209056fc7fdafd8d30c5d8b2 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 16:10:44 +0100
Subject: [PATCH 144/265] Fix issue #877

* Use _transpose_list_array helper function in space to reduce code
* Use eps value also in inverse_transform in Normalize
* Clip inverse_transform also for Integer
---
 skopt/space/space.py             | 58 ++++++++++++++------------------
 skopt/space/transformers.py      |  9 ++---
 skopt/tests/test_space.py        | 43 +++++++++++++++++++----
 skopt/tests/test_transformers.py | 17 ++++++----
 4 files changed, 78 insertions(+), 49 deletions(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index 1b7f49ea3..2b11fc960 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -24,6 +24,22 @@ def __repr__(self):
         return '...'
 
 
+def _transpose_list_array(x):
+    """Transposes a list matrix
+    """
+
+    n_dims = len(x)
+    assert n_dims > 0
+    n_samples = len(x[0])
+    rows = [None] * n_samples
+    for i in range(n_samples):
+        r = [None] * n_dims
+        for j in range(n_dims):
+            r[j] = x[j][i]
+        rows[i] = r
+    return rows
+
+
 def check_dimension(dimension, transform=None):
     """Turn a provided dimension description into a dimension object.
 
@@ -493,6 +509,8 @@ def inverse_transform(self, Xt):
         inv_transform = super(Integer, self).inverse_transform(Xt)
         if isinstance(inv_transform, list):
             inv_transform = np.array(inv_transform)
+        inv_transform = np.clip(inv_transform,
+                                self.low, self.high)
         if self.dtype == int or self.dtype == 'int':
             # necessary, otherwise the type is converted to a numpy type
             return getattr(np.round(inv_transform).astype(self.dtype),
@@ -512,7 +530,7 @@ def __contains__(self, point):
     @property
     def transformed_bounds(self):
         if self.transform_ == "normalize":
-            return 0, 1
+            return 0., 1.
         else:
             return (self.low, self.high)
 
@@ -603,7 +621,7 @@ def set_transformer(self, transform="onehot"):
         elif transform == "normalize":
             self.transformer = Pipeline(
                 [LabelEncoder(list(self.categories)),
-                 Normalize(0, len(self.categories) - 1)])
+                 Normalize(0, len(self.categories) - 1, is_int=True)])
         else:
             self.transformer = Identity()
             self.transformer.fit(self.categories)
@@ -675,7 +693,7 @@ def __contains__(self, point):
     @property
     def transformed_bounds(self):
         if self.transformed_size == 1:
-            return (0.0, 1.0)
+            return 0.0, 1.0
         else:
             return [(0.0, 1.0) for i in range(self.transformed_size)]
 
@@ -832,22 +850,10 @@ def rvs(self, n_samples=1, random_state=None):
         columns = []
 
         for dim in self.dimensions:
-            if sp_version < (0, 16):
-                columns.append(dim.rvs(n_samples=n_samples))
-            else:
-                columns.append(dim.rvs(n_samples=n_samples, random_state=rng))
+            columns.append(dim.rvs(n_samples=n_samples, random_state=rng))
 
         # Transpose
-        rows = []
-
-        for i in range(n_samples):
-            r = []
-            for j in range(self.n_dims):
-                r.append(columns[j][i])
-
-            rows.append(r)
-
-        return rows
+        return _transpose_list_array(columns)
 
     def set_transformer(self, transform):
         """Sets the transformer of all dimension objects to `transform`
@@ -884,10 +890,7 @@ def set_transformer_by_type(self, transform, dim_type):
 
     def get_transformer(self):
         """Returns all transformers as list"""
-        transformer = []
-        for j in range(self.n_dims):
-            transformer.append(self.dimensions[j].transform_)
-        return transformer
+        return [self.dimensions[j].transform_ for j in range(self.n_dims)]
 
     def transform(self, X):
         """Transform samples from the original space into a warped space.
@@ -940,7 +943,7 @@ def inverse_transform(self, Xt):
         # Inverse transform
         columns = []
         start = 0
-
+        Xt = np.asarray(Xt)
         for j in range(self.n_dims):
             dim = self.dimensions[j]
             offset = dim.transformed_size
@@ -954,16 +957,7 @@ def inverse_transform(self, Xt):
             start += offset
 
         # Transpose
-        rows = []
-
-        for i in range(len(Xt)):
-            r = []
-            for j in range(self.n_dims):
-                r.append(columns[j][i])
-
-            rows.append(r)
-
-        return rows
+        return _transpose_list_array(columns)
 
     @property
     def n_dims(self):
diff --git a/skopt/space/transformers.py b/skopt/space/transformers.py
index dde37f52f..44539aafd 100644
--- a/skopt/space/transformers.py
+++ b/skopt/space/transformers.py
@@ -238,6 +238,7 @@ def __init__(self, low, high, is_int=False):
         self.low = float(low)
         self.high = float(high)
         self.is_int = is_int
+        self._eps = 1e-8
 
     def transform(self, X):
         X = np.asarray(X)
@@ -249,10 +250,10 @@ def transform(self, X):
                 raise ValueError("All integer values should"
                                  "be greater than %f" % self.low)
         else:
-            if np.any(X > self.high + 1e-8):
+            if np.any(X > self.high + self._eps):
                 raise ValueError("All values should"
                                  "be less than %f" % self.high)
-            if np.any(X < self.low - 1e-8):
+            if np.any(X < self.low - self._eps):
                 raise ValueError("All values should"
                                  "be greater than %f" % self.low)
         if (self.high - self.low) == 0.:
@@ -265,9 +266,9 @@ def transform(self, X):
 
     def inverse_transform(self, X):
         X = np.asarray(X)
-        if np.any(X > 1.0):
+        if np.any(X > 1.0 + self._eps):
             raise ValueError("All values should be less than 1.0")
-        if np.any(X < 0.0):
+        if np.any(X < 0.0 - self._eps):
             raise ValueError("All values should be greater than 0.0")
         X_orig = X * (self.high - self.low) + self.low
         if self.is_int:
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index 0b09788d2..a71176fb4 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -16,6 +16,7 @@
 from skopt.space import Integer
 from skopt.space import Categorical
 from skopt.space import check_dimension as space_check_dimension
+from skopt.utils import normalize_dimensions
 
 
 def check_dimension(Dimension, vals, random_val):
@@ -40,8 +41,8 @@ def check_limits(value, low, high):
     # check if low <= value <= high
     if isinstance(value, list):
         value = np.array(value)
-    assert low <= value
-    assert high >= value
+    assert np.all(low <= value)
+    assert np.all(high >= value)
 
 
 @pytest.mark.fast_test
@@ -398,6 +399,20 @@ def test_normalize():
     assert_array_equal(space.inverse_transform(space.transform(X)), X)
 
 
+@pytest.mark.fast_test
+def test_normalize_types():
+    # can you pass a Space instance to the Space constructor?
+    space = Space([(0.0, 1.0), Integer(-5, 5, dtype=int), (True, False)])
+    space.set_transformer("normalize")
+    X = [[0., -5, False]]
+    Xt = np.zeros((1, 3))
+    assert_array_equal(space.transform(X), Xt)
+    assert_array_equal(space.inverse_transform(Xt), X)
+    assert_array_equal(space.inverse_transform(space.transform(X)), X)
+    assert isinstance(space.inverse_transform(Xt)[0][0], float)
+    assert isinstance(space.inverse_transform(Xt)[0][1], int)
+    assert isinstance(space.inverse_transform(Xt)[0][2], (np.bool_, bool))
+
 @pytest.mark.fast_test
 def test_normalize_real():
 
@@ -471,7 +486,7 @@ def test_normalize_integer():
     for i in range(50):
         check_limits(a.rvs(random_state=i), 2, 30)
     assert_array_equal(a.transformed_bounds, (0, 1))
-
+    rng = np.random.RandomState(0)
     X = rng.randint(2, 31, dtype=np.int64)
     # Check transformed values are in [0, 1]
     assert np.all(a.transform(X) <= np.ones_like(X))
@@ -517,9 +532,9 @@ def test_normalize_categorical():
     a = Categorical(categories, transform="normalize")
     for i in range(len(categories)):
         assert a.rvs(random_state=i)[0] in categories
-    assert a.inverse_transform(0.) == categories[0]
-    assert a.inverse_transform(0.5) == categories[1]
-    assert a.inverse_transform(1.0) == categories[2]
+    assert a.inverse_transform([0.]) == [categories[0]]
+    assert a.inverse_transform([0.5]) == [categories[1]]
+    assert a.inverse_transform([1.0]) == [categories[2]]
     assert_array_equal(categories, a.inverse_transform([0., 0.5, 1]))
 
     categories = [1, 2, 3]
@@ -732,3 +747,19 @@ def test_partly_categorical_space():
     assert not dims.is_categorical
     dims = Space([Integer(1, 2), Integer(1, 2)])
     assert not dims.is_partly_categorical
+
+
+@pytest.mark.fast_test
+def test_normalize_bounds():
+    bounds = [(-999, 189000), Categorical((True, False))]
+    space = Space(normalize_dimensions(bounds))
+    for a in np.linspace(1e-9, 0.4999, 1000):
+        x = space.inverse_transform([[a, a]])
+        check_limits(x[0][0], -999, 189000)
+        y = space.transform(x)
+        check_limits(y, 0., 1.)
+    for a in np.linspace(0.50001, 1e-9 + 1., 1000):
+        x = space.inverse_transform([[a, a]])
+        check_limits(x[0][0], -999, 189000)
+        y = space.transform(x)
+        check_limits(y, 0., 1.)
diff --git a/skopt/tests/test_transformers.py b/skopt/tests/test_transformers.py
index d1d62e929..2e2afb35f 100644
--- a/skopt/tests/test_transformers.py
+++ b/skopt/tests/test_transformers.py
@@ -97,13 +97,16 @@ def test_normalize_integer():
     assert transformer.transform(20.2) == 1.0
     assert transformer.transform(1.2) == 0.0
     assert transformer.transform(0.9) == 0.0
-    assert_raises(ValueError, transformer.transform, 20.6)
+    assert_raises(ValueError, transformer.transform, 21.6)
     assert_raises(ValueError, transformer.transform, 0.4)
 
     assert transformer.inverse_transform(0.99) == 20
     assert transformer.inverse_transform(0.01) == 1
-    assert_raises(ValueError, transformer.inverse_transform, 1. + 1e-8)
-    assert_raises(ValueError, transformer.transform, 0. - 1e-8)
+    assert_raises(ValueError, transformer.inverse_transform, 1. + 1e-5)
+    assert_raises(ValueError, transformer.transform, 0. - 1e-5)
+    transformer = Normalize(0, 20, is_int=True)
+    assert transformer.transform(-0.2) == 0.0
+    assert_raises(ValueError, transformer.transform, -0.6)
 
 
 @pytest.mark.fast_test
@@ -111,7 +114,7 @@ def test_normalize():
     transformer = Normalize(1, 20, is_int=False)
     assert transformer.transform(20.) == 1.0
     assert transformer.transform(1.) == 0.0
-    assert_raises(ValueError, transformer.transform, 20. + 1e-7)
-    assert_raises(ValueError, transformer.transform, 1.0 - 1e-7)
-    assert_raises(ValueError, transformer.inverse_transform, 1. + 1e-8)
-    assert_raises(ValueError, transformer.transform, 0. - 1e-8)
+    assert_raises(ValueError, transformer.transform, 20. + 1e-5)
+    assert_raises(ValueError, transformer.transform, 1.0 - 1e-5)
+    assert_raises(ValueError, transformer.inverse_transform, 1. + 1e-5)
+    assert_raises(ValueError, transformer.transform, 0. - 1e-5)

From ca62d6fef8ecc5b11a28d955942ba267f3ab3f78 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 16:14:29 +0100
Subject: [PATCH 145/265] Revert changes in unit tests

---
 skopt/tests/test_transformers.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/skopt/tests/test_transformers.py b/skopt/tests/test_transformers.py
index 2e2afb35f..ccb803473 100644
--- a/skopt/tests/test_transformers.py
+++ b/skopt/tests/test_transformers.py
@@ -97,13 +97,13 @@ def test_normalize_integer():
     assert transformer.transform(20.2) == 1.0
     assert transformer.transform(1.2) == 0.0
     assert transformer.transform(0.9) == 0.0
-    assert_raises(ValueError, transformer.transform, 21.6)
+    assert_raises(ValueError, transformer.transform, 20.6)
     assert_raises(ValueError, transformer.transform, 0.4)
 
     assert transformer.inverse_transform(0.99) == 20
     assert transformer.inverse_transform(0.01) == 1
-    assert_raises(ValueError, transformer.inverse_transform, 1. + 1e-5)
-    assert_raises(ValueError, transformer.transform, 0. - 1e-5)
+    assert_raises(ValueError, transformer.inverse_transform, 1. + 1e-6)
+    assert_raises(ValueError, transformer.transform, 0. - 1e-6)
     transformer = Normalize(0, 20, is_int=True)
     assert transformer.transform(-0.2) == 0.0
     assert_raises(ValueError, transformer.transform, -0.6)
@@ -114,7 +114,7 @@ def test_normalize():
     transformer = Normalize(1, 20, is_int=False)
     assert transformer.transform(20.) == 1.0
     assert transformer.transform(1.) == 0.0
-    assert_raises(ValueError, transformer.transform, 20. + 1e-5)
-    assert_raises(ValueError, transformer.transform, 1.0 - 1e-5)
-    assert_raises(ValueError, transformer.inverse_transform, 1. + 1e-5)
-    assert_raises(ValueError, transformer.transform, 0. - 1e-5)
+    assert_raises(ValueError, transformer.transform, 20. + 1e-6)
+    assert_raises(ValueError, transformer.transform, 1.0 - 1e-6)
+    assert_raises(ValueError, transformer.inverse_transform, 1. + 1e-6)
+    assert_raises(ValueError, transformer.transform, 0. - 1e-6)

From d36cd958e110ee9ee905d3167ac00969150aced0 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 16:50:57 +0100
Subject: [PATCH 146/265] Fix doc warnings and add entry to whats_new

---
 doc/whats_new/v0.8.rst |  7 ++++++-
 skopt/space/space.py   | 39 ++++++++++++++++++++++++---------------
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 7d791b466..4c02d5470 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -17,9 +17,14 @@ Version 0.8.0
 - |Enhancement| Improve sampler and add grid sampler
   :pr:`851` by :user:`Holger Nahrstaedt <holgern>`
 
-:mod:`skopt.utils`
+:mod:`skopt.space`
 ------------------
 - |Fix| Fix Optimizer for full categorical spaces
+  :pr:`880` by :user:`Holger Nahrstaedt <holgern>
+
+:mod:`skopt.utils`
+------------------
+- |Fix| Fix Integer transform and inverse_transform for normalize
   :pr:`874` by :user:`Holger Nahrstaedt <holgern>`
 
 Miscellaneous
diff --git a/skopt/space/space.py b/skopt/space/space.py
index 2b11fc960..2245fdd81 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -218,6 +218,7 @@ class Real(Dimension):
 
     prior : "uniform" or "log-uniform", default="uniform"
         Distribution to use when sampling random points for this dimension.
+
         - If `"uniform"`, points are sampled uniformly between the lower
           and upper bounds.
         - If `"log-uniform"`, points are sampled uniformly between
@@ -242,6 +243,7 @@ class Real(Dimension):
     dtype : str or dtype, default=np.float
         float type which will be used in inverse_transform,
         can be float.
+
     """
     def __init__(self, low, high, prior="uniform", base=10, transform=None,
                  name=None, dtype=np.float):
@@ -391,9 +393,10 @@ class Integer(Dimension):
     prior : "uniform" or "log-uniform", default="uniform"
         Distribution to use when sampling random integers for
         this dimension.
-        - If `"uniform"`, intgers are sampled uniformly between the lower
+
+        - If `"uniform"`, integers are sampled uniformly between the lower
           and upper bounds.
-        - If `"log-uniform"`, intgers are sampled uniformly between
+        - If `"log-uniform"`, integers are sampled uniformly between
           `log(lower, base)` and `log(upper, base)` where log
           has base `base`.
 
@@ -417,6 +420,7 @@ class Integer(Dimension):
         can be int, np.int16, np.uint32, np.int32, np.int64 (default).
         When set to int, `inverse_transform` returns a list instead of
         a numpy array
+
     """
     def __init__(self, low, high, prior="uniform", base=10, transform=None,
                  name=None, dtype=np.int64):
@@ -564,9 +568,10 @@ class Categorical(Dimension):
         are equally likely.
 
     transform : "onehot", "string", "identity", "label", default="onehot"
+
         - "identity", the transformed space is the same as the original
           space.
-        -  "string",  the transformed space is a string encoded
+        - "string",  the transformed space is a string encoded
           representation of the original space.
         - "label", the transformed space is a label encoded
           representation (integer) of the original space.
@@ -575,6 +580,7 @@ class Categorical(Dimension):
 
     name : str or None
         Name associated with dimension, e.g., "colors".
+
     """
     def __init__(self, categories, prior=None, transform=None, name=None):
         self.categories = tuple(categories)
@@ -770,25 +776,28 @@ def from_yaml(cls, yml_path, namespace=None):
         yml_path : str
             Full path to yaml configuration file, example YaML below:
             Space:
-              - Integer:
-                  low: -5
-                  high: 5
-              - Categorical:
-                  categories:
-                  - a
-                  - b
-              - Real:
-                  low: 1.0
-                  high: 5.0
-                  prior: log-uniform
+
+            - Integer:
+              low: -5
+              high: 5
+            - Categorical:
+              categories:
+              - a
+              - b
+            - Real:
+              low: 1.0
+              high: 5.0
+              prior: log-uniform
+
         namespace : str, default=None
            Namespace within configuration file to use, will use first
-             namespace if not provided
+           namespace if not provided
 
         Returns
         -------
         space : Space
            Instantiated Space object
+
         """
         with open(yml_path, 'rb') as f:
             config = yaml.safe_load(f)

From 738c2e4c7ba424fd137d2898cbecaad844f77a55 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 17:11:39 +0100
Subject: [PATCH 147/265] Fix doc warning

---
 doc/whats_new/v0.8.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 4c02d5470..544d59a7c 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -20,7 +20,7 @@ Version 0.8.0
 :mod:`skopt.space`
 ------------------
 - |Fix| Fix Optimizer for full categorical spaces
-  :pr:`880` by :user:`Holger Nahrstaedt <holgern>
+  :pr:`880` by :user:`Holger Nahrstaedt <holgern>`
 
 :mod:`skopt.utils`
 ------------------

From dd2063c8a57e185e2ef5d18d82bb0f8d2b36e7fc Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 17:16:46 +0100
Subject: [PATCH 148/265] Fix doc warnings

---
 skopt/learning/forest.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/skopt/learning/forest.py b/skopt/learning/forest.py
index 67cee9acf..467972b8d 100644
--- a/skopt/learning/forest.py
+++ b/skopt/learning/forest.py
@@ -28,6 +28,7 @@ def _return_std(X, trees, predictions, min_variance):
     std : array-like, shape=(n_samples,)
         Standard deviation of `y` at `X`. If criterion
         is set to "mse", then `std[i] ~= std(y | X[i])`.
+
     """
     # This derives std(y | x) as described in 4.3.2 of arXiv:1211.0906
     std = np.zeros(len(X))
@@ -68,6 +69,7 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
 
     max_features : int, float, string or None, optional (default="auto")
         The number of features to consider when looking for the best split:
+
         - If int, then consider `max_features` features at each split.
         - If float, then `max_features` is a percentage and
           `int(max_features * n_features)` features are considered at each
@@ -76,9 +78,11 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
         - If "sqrt", then `max_features=sqrt(n_features)`.
         - If "log2", then `max_features=log2(n_features)`.
         - If None, then `max_features=n_features`.
-        Note: the search for a split does not stop until at least one
-        valid partition of the node samples is found, even if it requires to
-        effectively inspect more than ``max_features`` features.
+
+        .. note::
+            The search for a split does not stop until at least one
+            valid partition of the node samples is found, even if it requires to
+            effectively inspect more than ``max_features`` features.
 
     max_depth : integer or None, optional (default=None)
         The maximum depth of the tree. If None, then nodes are expanded until
@@ -87,6 +91,7 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
 
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
+
         - If int, then consider `min_samples_split` as the minimum number.
         - If float, then `min_samples_split` is a percentage and
           `ceil(min_samples_split * n_samples)` are the minimum
@@ -94,6 +99,7 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
 
     min_samples_leaf : int, float, optional (default=1)
         The minimum number of samples required to be at a leaf node:
+
         - If int, then consider `min_samples_leaf` as the minimum number.
         - If float, then `min_samples_leaf` is a percentage and
           `ceil(min_samples_leaf * n_samples)` are the minimum
@@ -115,6 +121,7 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
         The weighted impurity decrease equation is the following::
             N_t / N * (impurity - N_t_R / N_t * right_impurity
                                 - N_t_L / N_t * left_impurity)
+
         where ``N`` is the total number of samples, ``N_t`` is the number of
         samples at the current node, ``N_t_L`` is the number of samples in the
         left child, and ``N_t_R`` is the number of samples in the right child.
@@ -183,6 +190,7 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
     References
     ----------
     .. [1] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001.
+
     """
     def __init__(self, n_estimators=10, criterion='mse', max_depth=None,
                  min_samples_split=2, min_samples_leaf=1,
@@ -224,6 +232,7 @@ def predict(self, X, return_std=False):
         std : array-like of shape=(n_samples,)
             Standard deviation of `y` at `X`. If criterion
             is set to "mse", then `std[i] ~= std(y | X[i])`.
+
         """
         mean = super(RandomForestRegressor, self).predict(X)
 
@@ -254,6 +263,7 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
 
     max_features : int, float, string or None, optional (default="auto")
         The number of features to consider when looking for the best split:
+
         - If int, then consider `max_features` features at each split.
         - If float, then `max_features` is a percentage and
           `int(max_features * n_features)` features are considered at each
@@ -262,9 +272,11 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
         - If "sqrt", then `max_features=sqrt(n_features)`.
         - If "log2", then `max_features=log2(n_features)`.
         - If None, then `max_features=n_features`.
-        Note: the search for a split does not stop until at least one
-        valid partition of the node samples is found, even if it requires to
-        effectively inspect more than ``max_features`` features.
+
+        .. note::
+            The search for a split does not stop until at least one
+            valid partition of the node samples is found, even if it requires to
+            effectively inspect more than ``max_features`` features.
 
     max_depth : integer or None, optional (default=None)
         The maximum depth of the tree. If None, then nodes are expanded until
@@ -273,6 +285,7 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
 
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
+
         - If int, then consider `min_samples_split` as the minimum number.
         - If float, then `min_samples_split` is a percentage and
           `ceil(min_samples_split * n_samples)` are the minimum
@@ -280,6 +293,7 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
 
     min_samples_leaf : int, float, optional (default=1)
         The minimum number of samples required to be at a leaf node:
+
         - If int, then consider `min_samples_leaf` as the minimum number.
         - If float, then `min_samples_leaf` is a percentage and
           `ceil(min_samples_leaf * n_samples)` are the minimum
@@ -301,6 +315,7 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
         The weighted impurity decrease equation is the following::
             N_t / N * (impurity - N_t_R / N_t * right_impurity
                                 - N_t_L / N_t * left_impurity)
+
         where ``N`` is the total number of samples, ``N_t`` is the number of
         samples at the current node, ``N_t_L`` is the number of samples in the
         left child, and ``N_t_R`` is the number of samples in the right child.
@@ -369,6 +384,7 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
     References
     ----------
     .. [1] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001.
+
     """
     def __init__(self, n_estimators=10, criterion='mse', max_depth=None,
                  min_samples_split=2, min_samples_leaf=1,

From c46b4b970bc86e140f26a9467413320ff8cd6f52 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 17:17:57 +0100
Subject: [PATCH 149/265] Fix whats_new

---
 doc/whats_new/v0.8.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 544d59a7c..c6a8cb255 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -19,12 +19,12 @@ Version 0.8.0
 
 :mod:`skopt.space`
 ------------------
-- |Fix| Fix Optimizer for full categorical spaces
+- |Fix| Fix Integer transform and inverse_transform for normalize
   :pr:`880` by :user:`Holger Nahrstaedt <holgern>`
 
 :mod:`skopt.utils`
 ------------------
-- |Fix| Fix Integer transform and inverse_transform for normalize
+- |Fix| Fix Optimizer for full categorical spaces
   :pr:`874` by :user:`Holger Nahrstaedt <holgern>`
 
 Miscellaneous

From 706afd61232cc3c18bc24cfab822c35898ffc61b Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 17:20:46 +0100
Subject: [PATCH 150/265] Fix pep8

---
 skopt/learning/forest.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/skopt/learning/forest.py b/skopt/learning/forest.py
index 467972b8d..991e4f213 100644
--- a/skopt/learning/forest.py
+++ b/skopt/learning/forest.py
@@ -81,8 +81,9 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
 
         .. note::
             The search for a split does not stop until at least one
-            valid partition of the node samples is found, even if it requires to
-            effectively inspect more than ``max_features`` features.
+            valid partition of the node samples is found, even if it
+            requires to effectively inspect more than ``max_features``
+            features.
 
     max_depth : integer or None, optional (default=None)
         The maximum depth of the tree. If None, then nodes are expanded until
@@ -275,8 +276,9 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
 
         .. note::
             The search for a split does not stop until at least one
-            valid partition of the node samples is found, even if it requires to
-            effectively inspect more than ``max_features`` features.
+            valid partition of the node samples is found, even if it
+            requires to effectively inspect more than ``max_features``
+            features.
 
     max_depth : integer or None, optional (default=None)
         The maximum depth of the tree. If None, then nodes are expanded until

From 3bc825c6ad275990e3110eb9464986fe0f8bd887 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 17:32:30 +0100
Subject: [PATCH 151/265] Fix doc warning

---
 skopt/learning/forest.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/skopt/learning/forest.py b/skopt/learning/forest.py
index 991e4f213..096770c1d 100644
--- a/skopt/learning/forest.py
+++ b/skopt/learning/forest.py
@@ -120,6 +120,7 @@ class RandomForestRegressor(_sk_RandomForestRegressor):
         A node will be split if this split induces a decrease of the impurity
         greater than or equal to this value.
         The weighted impurity decrease equation is the following::
+
             N_t / N * (impurity - N_t_R / N_t * right_impurity
                                 - N_t_L / N_t * left_impurity)
 
@@ -315,6 +316,7 @@ class ExtraTreesRegressor(_sk_ExtraTreesRegressor):
         A node will be split if this split induces a decrease of the impurity
         greater than or equal to this value.
         The weighted impurity decrease equation is the following::
+
             N_t / N * (impurity - N_t_R / N_t * right_impurity
                                 - N_t_L / N_t * left_impurity)
 

From c10d934513000a95c096fabef854a939c911bb76 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 20:26:29 +0100
Subject: [PATCH 152/265] Fix more doc warnings

---
 skopt/optimizer/base.py      | 48 +++++++++++-----------
 skopt/optimizer/dummy.py     | 10 +----
 skopt/optimizer/forest.py    | 15 +++----
 skopt/optimizer/gbrt.py      | 12 ++----
 skopt/optimizer/gp.py        | 77 ++++++++++++------------------------
 skopt/optimizer/optimizer.py | 54 +++++++++++++------------
 skopt/searchcv.py            |  3 +-
 7 files changed, 87 insertions(+), 132 deletions(-)

diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index d0784ac1c..2d11c2644 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -30,6 +30,7 @@ def base_minimize(func, dimensions, base_estimator,
                   callback=None, n_points=10000, n_restarts_optimizer=5,
                   xi=0.01, kappa=1.96, n_jobs=1, model_queue_size=None):
     """Base optimizer class
+
     Parameters
     ----------
     func : callable
@@ -53,8 +54,8 @@ def base_minimize(func, dimensions, base_estimator,
         - an instance of a `Dimension` object (`Real`, `Integer` or
           `Categorical`).
 
-         NOTE: The upper and lower bounds are inclusive for `Integer`
-         dimensions.
+         .. note:: The upper and lower bounds are inclusive for `Integer`
+            dimensions.
 
     base_estimator : sklearn regressor
         Should inherit from `sklearn.base.RegressorMixin`.
@@ -62,13 +63,14 @@ def base_minimize(func, dimensions, base_estimator,
         which returns `std(Y | x)`` along with `E[Y | x]`.
 
     n_calls : int, default=100
-        Maximum number of calls to `func`. An objective fucntion will
+        Maximum number of calls to `func`. An objective function will
         always be evaluated this number of times; Various options to
         supply initialization points do not affect this value.
 
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+
         .. deprecated:: 0.9
             use `n_initial_points` instead.
 
@@ -81,14 +83,14 @@ def base_minimize(func, dimensions, base_estimator,
             default='random'
         Sets a initial points generator. Can be either
 
-        - "random" for uniform random numbers,
-        - "sobol" for a Sobol sequence,
-        - "halton" for a Halton sequence,
-        - "hammersly" for a Hammersly sequence,
-        - "lhs" for a latin hypercube sequence,
-        - "grid" for a uniform grid sequence
+        - `"random"` for uniform random numbers,
+        - `"sobol"` for a Sobol sequence,
+        - `"halton"` for a Halton sequence,
+        - `"hammersly"` for a Hammersly sequence,
+        - `"lhs"` for a latin hypercube sequence,
+        - `"grid"` for a uniform grid sequence
 
-    acq_func : string, default=`"EI"`
+    acq_func : string, default="EI"
         Function to minimize over the posterior distribution. Can be either
 
         - `"LCB"` for lower confidence bound,
@@ -100,10 +102,10 @@ def base_minimize(func, dimensions, base_estimator,
           the second being the time taken in seconds.
         - `"PIps"` for negated probability of improvement per second. The
           return type of the objective function is assumed to be similar to
-          that of `"EIps
+          that of `"EIps"`
 
     acq_optimizer : string, `"sampling"` or `"lbfgs"`, default=`"lbfgs"`
-        Method to minimize the acquistion function. The fit model
+        Method to minimize the acquisition function. The fit model
         is updated with the optimal value obtained by optimizing `acq_func`
         with `acq_optimizer`.
 
@@ -111,11 +113,12 @@ def base_minimize(func, dimensions, base_estimator,
           `acq_func` at `n_points` randomly sampled points and the smallest
           value found is used.
         - If set to `"lbfgs"`, then
-              - The `n_restarts_optimizer` no. of points which the acquisition
-                function is least are taken as start points.
-              - `"lbfgs"` is run for 20 iterations with these points as initial
-                points to find local minima.
-              - The optimal of these local minima is used to update the prior.
+
+          - The `n_restarts_optimizer` no. of points which the acquisition
+            function is least are taken as start points.
+          - `"lbfgs"` is run for 20 iterations with these points as initial
+            points to find local minima.
+          - The optimal of these local minima is used to update the prior.
 
     x0 : list, list of lists or `None`
         Initial input points.
@@ -191,22 +194,15 @@ def base_minimize(func, dimensions, base_estimator,
         Important attributes are:
 
         - `x` [list]: location of the minimum.
-
         - `fun` [float]: function value at the minimum.
-
         - `models`: surrogate models used for each iteration.
-
         - `x_iters` [list of lists]: location of function evaluation for each
-           iteration.
-
+          iteration.
         - `func_vals` [array]: function value for each iteration.
-
         - `space` [Space]: the optimization space.
-
         - `specs` [dict]`: the call specifications.
-
         - `rng` [RandomState instance]: State of the random state
-           at the end of minimization.
+          at the end of minimization.
 
         For more details related to the OptimizeResult object, refer
         http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
diff --git a/skopt/optimizer/dummy.py b/skopt/optimizer/dummy.py
index 1e432d1de..9621b4221 100644
--- a/skopt/optimizer/dummy.py
+++ b/skopt/optimizer/dummy.py
@@ -87,20 +87,14 @@ def dummy_minimize(func, dimensions, n_calls=100,
         Important attributes are:
 
         - `x` [list]: location of the minimum.
-
         - `fun` [float]: function value at the minimum.
-
         - `x_iters` [list of lists]: location of function evaluation for each
-           iteration.
-
+          iteration.
         - `func_vals` [array]: function value for each iteration.
-
         - `space` [Space]: the optimisation space.
-
         - `specs` [dict]: the call specifications.
-
         - `rng` [RandomState instance]: State of the random state
-           at the end of minimization.
+          at the end of minimization.
 
         For more details related to the OptimizeResult object, refer
         http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index b734e3589..b6234bd58 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -42,7 +42,7 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         then you can use :func:`skopt.utils.use_named_args` as a decorator
         on your objective function, in order to call it directly
         with the named arguments. See :func:`skopt.utils.use_named_args`
-         for an example.
+        for an example.
 
     dimensions : list, shape (n_dims,)
         List of search space dimensions.
@@ -56,8 +56,8 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         - an instance of a `Dimension` object (`Real`, `Integer` or
           `Categorical`).
 
-         NOTE: The upper and lower bounds are inclusive for `Integer`
-         dimensions.
+         .. note:: The upper and lower bounds are inclusive for `Integer`
+            dimensions.
 
     base_estimator : string or `Regressor`, default="ET"
         The regressor to use as surrogate model. Can be either
@@ -78,6 +78,7 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+
         .. deprecated:: 0.9
             use `n_initial_points` instead.
 
@@ -168,18 +169,12 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         Important attributes are:
 
         - `x` [list]: location of the minimum.
-
         - `fun` [float]: function value at the minimum.
-
         - `models`: surrogate models used for each iteration.
-
         - `x_iters` [list of lists]: location of function evaluation for each
-           iteration.
-
+          iteration.
         - `func_vals` [array]: function value for each iteration.
-
         - `space` [Space]: the optimization space.
-
         - `specs` [dict]`: the call specifications.
 
         For more details related to the OptimizeResult object, refer
diff --git a/skopt/optimizer/gbrt.py b/skopt/optimizer/gbrt.py
index 2196a4d5c..4af57b953 100644
--- a/skopt/optimizer/gbrt.py
+++ b/skopt/optimizer/gbrt.py
@@ -65,6 +65,7 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+
         .. deprecated:: 0.9
             use `n_initial_points` instead.
 
@@ -153,22 +154,15 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         Important attributes are:
 
         - `x` [list]: location of the minimum.
-
         - `fun` [float]: function value at the minimum.
-
         - `models`: surrogate models used for each iteration.
-
         - `x_iters` [list of lists]: location of function evaluation for each
-           iteration.
-
+          iteration.
         - `func_vals` [array]: function value for each iteration.
-
         - `space` [Space]: the optimization space.
-
         - `specs` [dict]`: the call specifications.
-
         - `rng` [RandomState instance]: State of the random state
-           at the end of minimization.
+          at the end of minimization.
 
         For more details related to the OptimizeResult object, refer
         http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index dac3e3e9f..16ca440ed 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -61,17 +61,14 @@ def gp_minimize(func, dimensions, base_estimator=None,
 
         - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
           dimensions),
-
         - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
           dimensions),
-
         - as a list of categories (for `Categorical` dimensions), or
-
         - an instance of a `Dimension` object (`Real`, `Integer` or
           `Categorical`).
 
          .. note:: The upper and lower bounds are inclusive for `Integer`
-         dimensions.
+            dimensions.
 
     base_estimator : a Gaussian process estimator
         The Gaussian process estimator to use for optimization.
@@ -79,9 +76,7 @@ def gp_minimize(func, dimensions, base_estimator=None,
         hyperparameters tuned.
 
         - All the length scales of the Matern kernel.
-
         - The covariance amplitude that each element is multiplied with.
-
         - Noise that is added to the matern kernel. The noise is assumed
           to be iid gaussian.
 
@@ -91,6 +86,7 @@ def gp_minimize(func, dimensions, base_estimator=None,
     n_random_starts : int, default=10
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
+
         .. deprecated:: 0.9
             use `n_initial_points` instead.
 
@@ -103,47 +99,40 @@ def gp_minimize(func, dimensions, base_estimator=None,
             default='random'
         Sets a initial points generator. Can be either
 
-        - "random" for uniform random numbers,
-        - "sobol" for a Sobol sequence,
-        - "halton" for a Halton sequence,
-        - "hammersly" for a Hammersly sequence,
-        - "lhs" for a latin hypercube sequence,
+        - `"random"` for uniform random numbers,
+        - `"sobol"` for a Sobol sequence,
+        - `"halton"` for a Halton sequence,
+        - `"hammersly"` for a Hammersly sequence,
+        - `"lhs"` for a latin hypercube sequence,
 
     acq_func : string, default=`"gp_hedge"`
         Function to minimize over the gaussian prior. Can be either
 
         - `"LCB"` for lower confidence bound.
-
         - `"EI"` for negative expected improvement.
-
         - `"PI"` for negative probability of improvement.
-
         - `"gp_hedge"` Probabilistically choose one of the above three
           acquisition functions at every iteration. The weightage
           given to these gains can be set by :math:`\eta` through
           `acq_func_kwargs`.
 
-            - The gains `g_i` are initialized to zero.
-
-            - At every iteration,
-
-                - Each acquisition function is optimised independently to
-                  propose an candidate point `X_i`.
-
-                - Out of all these candidate points, the next point `X_best` is
-                  chosen by :math:`softmax(\eta g_i)`
+          - The gains `g_i` are initialized to zero.
+          - At every iteration,
 
-                - After fitting the surrogate model with `(X_best, y_best)`,
-                  the gains are updated such that :math:`g_i -= \mu(X_i)`
+            - Each acquisition function is optimised independently to
+              propose an candidate point `X_i`.
+            - Out of all these candidate points, the next point `X_best` is
+              chosen by :math:`softmax(\eta g_i)`
+            - After fitting the surrogate model with `(X_best, y_best)`,
+              the gains are updated such that :math:`g_i -= \mu(X_i)`
 
         - `"EIps"` for negated expected improvement per second to take into
           account the function compute time. Then, the objective function is
           assumed to return two values, the first being the objective value and
           the second being the time taken in seconds.
-
         - `"PIps"` for negated probability of improvement per second. The
           return type of the objective function is assumed to be similar to
-          that of `"EIps
+          that of `"EIps"`
 
     acq_optimizer : string, `"sampling"` or `"lbfgs"`, default=`"lbfgs"`
         Method to minimize the acquistion function. The fit model
@@ -154,28 +143,22 @@ def gp_minimize(func, dimensions, base_estimator=None,
 
         - If set to `"auto"`, then `acq_optimizer` is configured on the
           basis of the space searched over.
-          If the space is Categorical then this is set to be "sampling"`.
-
+          If the space is Categorical then this is set to be `"sampling"`.
         - If set to `"sampling"`, then the point among these `n_points`
           where the `acq_func` is minimum is the next candidate minimum.
-
         - If set to `"lbfgs"`, then
 
-              - The `n_restarts_optimizer` no. of points which the acquisition
-                function is least are taken as start points.
-
-              - `"lbfgs"` is run for 20 iterations with these points as initial
-                points to find local minima.
-
-              - The optimal of these local minima is used to update the prior.
+          - The `n_restarts_optimizer` no. of points which the acquisition
+            function is least are taken as start points.
+          - `"lbfgs"` is run for 20 iterations with these points as initial
+            points to find local minima.
+          - The optimal of these local minima is used to update the prior.
 
     x0 : list, list of lists or `None`
         Initial input points.
 
         - If it is a list of lists, use it as a list of input points.
-
         - If it is a list, use it as a single initial input point.
-
         - If it is `None`, no initial input points are used.
 
     y0 : list, scalar or `None`
@@ -184,10 +167,8 @@ def gp_minimize(func, dimensions, base_estimator=None,
         - If it is a list, then it corresponds to evaluations of the function
           at each element of `x0` : the i-th element of `y0` corresponds
           to the function evaluated at the i-th element of `x0`.
-
         - If it is a scalar, then it corresponds to the evaluation of the
           function at `x0`.
-
         - If it is None and `x0` is provided, then the function is evaluated
           at each element of `x0`.
 
@@ -226,10 +207,8 @@ def gp_minimize(func, dimensions, base_estimator=None,
         - Use noise="gaussian" if the objective returns noisy observations.
           The noise of each observation is assumed to be iid with
           mean zero and a fixed variance.
-
         - If the variance is known before-hand, this can be set directly
           to the variance of the noise.
-
         - Set this to a value close to zero (1e-10) if the function is
           noise-free. Setting to zero might cause stability issues.
 
@@ -251,28 +230,22 @@ def gp_minimize(func, dimensions, base_estimator=None,
         Important attributes are:
 
         - `x` [list]: location of the minimum.
-
         - `fun` [float]: function value at the minimum.
-
         - `models`: surrogate models used for each iteration.
-
         - `x_iters` [list of lists]: location of function evaluation for each
-           iteration.
-
+          iteration.
         - `func_vals` [array]: function value for each iteration.
-
         - `space` [Space]: the optimization space.
-
         - `specs` [dict]`: the call specifications.
-
         - `rng` [RandomState instance]: State of the random state
-           at the end of minimization.
+          at the end of minimization.
 
         For more details related to the OptimizeResult object, refer
         http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
 
     .. seealso:: functions :class:`skopt.forest_minimize`,
         :class:`skopt.dummy_minimize`
+
     """
     # Check params
     rng = check_random_state(random_state)
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index bdd929f1e..f1dc58cff 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -74,14 +74,14 @@ class Optimizer(object):
             default='random'
         Sets a initial points generator. Can be either
 
-        - "random" for uniform random numbers,
-        - "sobol" for a Sobol sequence,
-        - "halton" for a Halton sequence,
-        - "hammersly" for a Hammersly sequence,
-        - "lhs" for a latin hypercube sequence,
-        - "grid" for a uniform grid sequence
-
-    acq_func : string, default=`"gp_hedge"`
+        - `"random"` for uniform random numbers,
+        - `"sobol"` for a Sobol sequence,
+        - `"halton"` for a Halton sequence,
+        - `"hammersly"` for a Hammersly sequence,
+        - `"lhs"` for a latin hypercube sequence,
+        - `"grid"` for a uniform grid sequence
+
+    acq_func : string, default="gp_hedge"
         Function to minimize over the posterior distribution. Can be either
 
         - `"LCB"` for lower confidence bound.
@@ -89,38 +89,42 @@ class Optimizer(object):
         - `"PI"` for negative probability of improvement.
         - `"gp_hedge"` Probabilistically choose one of the above three
           acquisition functions at every iteration.
-            - The gains `g_i` are initialized to zero.
-            - At every iteration,
-                - Each acquisition function is optimised independently to
-                  propose an candidate point `X_i`.
-                - Out of all these candidate points, the next point `X_best` is
-                  chosen by :math:`softmax(\eta g_i)`
-                - After fitting the surrogate model with `(X_best, y_best)`,
-                  the gains are updated such that :math:`g_i -= \mu(X_i)`
-        - `"EIps" for negated expected improvement per second to take into
+
+          - The gains `g_i` are initialized to zero.
+          - At every iteration,
+
+            - Each acquisition function is optimised independently to
+              propose an candidate point `X_i`.
+            - Out of all these candidate points, the next point `X_best` is
+              chosen by :math:`softmax(\eta g_i)`
+            - After fitting the surrogate model with `(X_best, y_best)`,
+              the gains are updated such that :math:`g_i -= \mu(X_i)`
+
+        - `"EIps"` for negated expected improvement per second to take into
           account the function compute time. Then, the objective function is
           assumed to return two values, the first being the objective value and
           the second being the time taken in seconds.
         - `"PIps"` for negated probability of improvement per second. The
           return type of the objective function is assumed to be similar to
-          that of `"EIps
+          that of `"EIps"`
 
-    acq_optimizer : string, `"sampling"` or `"lbfgs"`, default=`"auto"`
-        Method to minimize the acquistion function. The fit model
+    acq_optimizer : string, "sampling" or "lbfgs", default="auto"
+        Method to minimize the acquisition function. The fit model
         is updated with the optimal value obtained by optimizing `acq_func`
         with `acq_optimizer`.
 
         - If set to `"auto"`, then `acq_optimizer` is configured on the
           basis of the base_estimator and the space searched over.
           If the space is Categorical or if the estimator provided based on
-          tree-models then this is set to be "sampling"`.
+          tree-models then this is set to be `"sampling"`.
         - If set to `"sampling"`, then `acq_func` is optimized by computing
           `acq_func` at `n_points` randomly sampled points.
         - If set to `"lbfgs"`, then `acq_func` is optimized by
-              - Sampling `n_restarts_optimizer` points randomly.
-              - `"lbfgs"` is run for 20 iterations with these points as initial
-                points to find local minima.
-              - The optimal of these local minima is used to update the prior.
+
+          - Sampling `n_restarts_optimizer` points randomly.
+          - `"lbfgs"` is run for 20 iterations with these points as initial
+            points to find local minima.
+          - The optimal of these local minima is used to update the prior.
 
     random_state : int, RandomState instance, or None (default)
         Set random state to something other than None for reproducible
diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index ad80c10ca..4b09eace9 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -110,10 +110,8 @@ class BayesSearchCV(BaseSearchCV):
               created and spawned. Use this for lightweight and
               fast-running jobs, to avoid delays due to on-demand
               spawning of the jobs
-
             - An int, giving the exact number of total jobs that are
               spawned
-
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
 
@@ -125,6 +123,7 @@ class BayesSearchCV(BaseSearchCV):
     cv : int, cross-validation generator or an iterable, optional
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
+
           - None, to use the default 3-fold cross validation,
           - integer, to specify the number of folds in a `(Stratified)KFold`,
           - An object to be used as a cross-validation generator.

From 9647222ab7b0effde48351076403bae3e3981d67 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 20:29:55 +0100
Subject: [PATCH 153/265] Fix pep8

---
 skopt/optimizer/gp.py        | 6 +++---
 skopt/optimizer/optimizer.py | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index 16ca440ed..2bceb0b50 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -113,7 +113,7 @@ def gp_minimize(func, dimensions, base_estimator=None,
         - `"PI"` for negative probability of improvement.
         - `"gp_hedge"` Probabilistically choose one of the above three
           acquisition functions at every iteration. The weightage
-          given to these gains can be set by :math:`\eta` through
+          given to these gains can be set by :math:`\\eta` through
           `acq_func_kwargs`.
 
           - The gains `g_i` are initialized to zero.
@@ -122,9 +122,9 @@ def gp_minimize(func, dimensions, base_estimator=None,
             - Each acquisition function is optimised independently to
               propose an candidate point `X_i`.
             - Out of all these candidate points, the next point `X_best` is
-              chosen by :math:`softmax(\eta g_i)`
+              chosen by :math:`softmax(\\eta g_i)`
             - After fitting the surrogate model with `(X_best, y_best)`,
-              the gains are updated such that :math:`g_i -= \mu(X_i)`
+              the gains are updated such that :math:`g_i -= \\mu(X_i)`
 
         - `"EIps"` for negated expected improvement per second to take into
           account the function compute time. Then, the objective function is
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index f1dc58cff..170a0b0da 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -52,8 +52,8 @@ class Optimizer(object):
         - an instance of a `Dimension` object (`Real`, `Integer` or
           `Categorical`).
 
-    base_estimator : `"GP"`, `"RF"`, `"ET"`, `"GBRT"` or sklearn regressor, \
-            default=`"GP"`
+    base_estimator : "GP", "RF", "ET", "GBRT" or sklearn regressor, \
+            default="GP"
         Should inherit from :obj:`sklearn.base.RegressorMixin`.
         In addition the `predict` method, should have an optional `return_std`
         argument, which returns `std(Y | x)`` along with `E[Y | x]`.
@@ -96,9 +96,9 @@ class Optimizer(object):
             - Each acquisition function is optimised independently to
               propose an candidate point `X_i`.
             - Out of all these candidate points, the next point `X_best` is
-              chosen by :math:`softmax(\eta g_i)`
+              chosen by :math:`softmax(\\eta g_i)`
             - After fitting the surrogate model with `(X_best, y_best)`,
-              the gains are updated such that :math:`g_i -= \mu(X_i)`
+              the gains are updated such that :math:`g_i -= \\mu(X_i)`
 
         - `"EIps"` for negated expected improvement per second to take into
           account the function compute time. Then, the objective function is

From 1ff87036c38ab22ae9474e62073449d49934eb81 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 21:04:49 +0100
Subject: [PATCH 154/265] Improve doc for optimizer

---
 skopt/optimizer/base.py      | 30 +++++++++++++--------------
 skopt/optimizer/dummy.py     | 23 ++++++++++++---------
 skopt/optimizer/forest.py    | 40 ++++++++++++++++++------------------
 skopt/optimizer/gbrt.py      | 39 +++++++++++++++++++----------------
 skopt/optimizer/gp.py        | 34 +++++++++++++++---------------
 skopt/optimizer/optimizer.py | 30 +++++++++++++--------------
 6 files changed, 101 insertions(+), 95 deletions(-)

diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index 2d11c2644..b1821c018 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -60,27 +60,27 @@ def base_minimize(func, dimensions, base_estimator,
     base_estimator : sklearn regressor
         Should inherit from `sklearn.base.RegressorMixin`.
         In addition, should have an optional `return_std` argument,
-        which returns `std(Y | x)`` along with `E[Y | x]`.
+        which returns `std(Y | x)` along with `E[Y | x]`.
 
-    n_calls : int, default=100
+    n_calls : int, default: 100
         Maximum number of calls to `func`. An objective function will
         always be evaluated this number of times; Various options to
         supply initialization points do not affect this value.
 
-    n_random_starts : int, default=10
+    n_random_starts : int, default: None
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
 
-        .. deprecated:: 0.9
+        .. deprecated:: 0.8
             use `n_initial_points` instead.
 
-    n_initial_points : int, default=10
+    n_initial_points : int, default: 10
         Number of evaluations of `func` with initialization points
         before approximating it with `base_estimator`. Initial point
         generator can be changed by setting `initial_point_generator`.
 
     initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+            default: `"random"`
         Sets a initial points generator. Can be either
 
         - `"random"` for uniform random numbers,
@@ -90,7 +90,7 @@ def base_minimize(func, dimensions, base_estimator,
         - `"lhs"` for a latin hypercube sequence,
         - `"grid"` for a uniform grid sequence
 
-    acq_func : string, default="EI"
+    acq_func : string, default: `"EI"`
         Function to minimize over the posterior distribution. Can be either
 
         - `"LCB"` for lower confidence bound,
@@ -104,7 +104,7 @@ def base_minimize(func, dimensions, base_estimator,
           return type of the objective function is assumed to be similar to
           that of `"EIps"`
 
-    acq_optimizer : string, `"sampling"` or `"lbfgs"`, default=`"lbfgs"`
+    acq_optimizer : string, `"sampling"` or `"lbfgs"`, default: `"lbfgs"`
         Method to minimize the acquisition function. The fit model
         is updated with the optimal value obtained by optimizing `acq_func`
         with `acq_optimizer`.
@@ -149,7 +149,7 @@ def base_minimize(func, dimensions, base_estimator,
         Set random state to something other than None for reproducible
         results.
 
-    verbose : boolean, default=False
+    verbose : boolean, default: False
         Control the verbosity. It is advised to set the verbosity to True
         for long optimization runs.
 
@@ -157,33 +157,33 @@ def base_minimize(func, dimensions, base_estimator,
         If callable then `callback(res)` is called after each call to `func`.
         If list of callables, then each callable in the list is called.
 
-    n_points : int, default=10000
+    n_points : int, default: 10000
         If `acq_optimizer` is set to `"sampling"`, then `acq_func` is
         optimized by computing `acq_func` at `n_points` randomly sampled
         points.
 
-    n_restarts_optimizer : int, default=5
+    n_restarts_optimizer : int, default: 5
         The number of restarts of the optimizer when `acq_optimizer`
         is `"lbfgs"`.
 
-    xi : float, default=0.01
+    xi : float, default: 0.01
         Controls how much improvement one wants over the previous best
         values. Used when the acquisition is either `"EI"` or `"PI"`.
 
-    kappa : float, default=1.96
+    kappa : float, default: 1.96
         Controls how much of the variance in the predicted values should be
         taken into account. If set to be very high, then we are favouring
         exploration over exploitation and vice versa.
         Used when the acquisition is `"LCB"`.
 
-    n_jobs : int, default=1
+    n_jobs : int, default: 1
         Number of cores to run in parallel while running the lbfgs
         optimizations over the acquisition function. Valid only when
         `acq_optimizer` is set to "lbfgs."
         Defaults to 1 core. If `n_jobs=-1`, then number of jobs is set
         to number of cores.
 
-    model_queue_size : int or None, default=None
+    model_queue_size : int or None, default: None
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
diff --git a/skopt/optimizer/dummy.py b/skopt/optimizer/dummy.py
index 9621b4221..71902ee97 100644
--- a/skopt/optimizer/dummy.py
+++ b/skopt/optimizer/dummy.py
@@ -32,19 +32,19 @@ def dummy_minimize(func, dimensions, n_calls=100,
         - an instance of a `Dimension` object (`Real`, `Integer` or
           `Categorical`).
 
-    n_calls : int, default=100
+    n_calls : int, default: 100
         Number of calls to `func` to find the minimum.
 
     initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+            default: `"random"`
         Sets a initial points generator. Can be either
 
-        - "random" for uniform random numbers,
-        - "sobol" for a Sobol sequence,
-        - "halton" for a Halton sequence,
-        - "hammersly" for a Hammersly sequence,
-        - "lhs" for a latin hypercube sequence,
-        - "grid" for a uniform grid sequence
+        - `"random"` for uniform random numbers,
+        - `"sobol"` for a Sobol sequence,
+        - `"halton"` for a Halton sequence,
+        - `"hammersly"` for a Hammersly sequence,
+        - `"lhs"` for a latin hypercube sequence,
+        - `"grid"` for a uniform grid sequence
 
     x0 : list, list of lists or `None`
         Initial input points.
@@ -68,7 +68,7 @@ def dummy_minimize(func, dimensions, n_calls=100,
         Set random state to something other than None for reproducible
         results.
 
-    verbose : boolean, default=False
+    verbose : boolean, default: False
         Control the verbosity. It is advised to set the verbosity to True
         for long optimization runs.
 
@@ -76,7 +76,7 @@ def dummy_minimize(func, dimensions, n_calls=100,
         If callable then `callback(res)` is called after each call to `func`.
         If list of callables, then each callable in the list is called.
 
-    model_queue_size : int or None, default=None
+    model_queue_size : int or None, default: None
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
@@ -98,6 +98,9 @@ def dummy_minimize(func, dimensions, n_calls=100,
 
         For more details related to the OptimizeResult object, refer
         http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
+
+    .. seealso:: functions :class:`skopt.gp_minimize`,
+        :class:`skopt.forest_minimize`, :class:`skopt.gbrt_minimize`
     """
     # all our calls want random suggestions, except if we need to evaluate
     # some initial points
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index b6234bd58..f1722ec17 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -59,7 +59,7 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
          .. note:: The upper and lower bounds are inclusive for `Integer`
             dimensions.
 
-    base_estimator : string or `Regressor`, default="ET"
+    base_estimator : string or `Regressor`, default: `"ET"`
         The regressor to use as surrogate model. Can be either
 
         - `"RF"` for random forest regressor
@@ -72,33 +72,33 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         a regressor which returns the mean and standard deviation when
         making predictions.
 
-    n_calls : int, default=100
+    n_calls : int, default: 100
         Number of calls to `func`.
 
-    n_random_starts : int, default=10
+    n_random_starts : int, default: None
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
 
-        .. deprecated:: 0.9
+        .. deprecated:: 0.8
             use `n_initial_points` instead.
 
-    n_initial_points : int, default=10
+    n_initial_points : int, default: 10
         Number of evaluations of `func` with initialization points
         before approximating it with `base_estimator`. Initial point
         generator can be changed by setting `initial_point_generator`.
 
     initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+            default: `"random"`
         Sets a initial points generator. Can be either
 
-        - "random" for uniform random numbers,
-        - "sobol" for a Sobol sequence,
-        - "halton" for a Halton sequence,
-        - "hammersly" for a Hammersly sequence,
-        - "lhs" for a latin hypercube sequence,
-        - "grid" for a uniform grid sequence
+        - `"random"` for uniform random numbers,
+        - `"sobol"` for a Sobol sequence,
+        - `"halton"` for a Halton sequence,
+        - `"hammersly"` for a Hammersly sequence,
+        - `"lhs"` for a latin hypercube sequence,
+        - `"grid"` for a uniform grid sequence
 
-    acq_func : string, default="LCB"
+    acq_func : string, default: `"LCB"`
         Function to minimize over the forest posterior. Can be either
 
         - `"LCB"` for lower confidence bound.
@@ -134,31 +134,31 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         Set random state to something other than None for reproducible
         results.
 
-    verbose : boolean, default=False
+    verbose : boolean, default: False
         Control the verbosity. It is advised to set the verbosity to True
         for long optimization runs.
 
     callback : callable, optional
         If provided, then `callback(res)` is called after call to func.
 
-    n_points : int, default=10000
+    n_points : int, default: 10000
         Number of points to sample when minimizing the acquisition function.
 
-    xi : float, default=0.01
+    xi : float, default: 0.01
         Controls how much improvement one wants over the previous best
         values. Used when the acquisition is either `"EI"` or `"PI"`.
 
-    kappa : float, default=1.96
+    kappa : float, default: 1.96
         Controls how much of the variance in the predicted values should be
         taken into account. If set to be very high, then we are favouring
         exploration over exploitation and vice versa.
         Used when the acquisition is `"LCB"`.
 
-    n_jobs : int, default=1
+    n_jobs : int, default: 1
         The number of jobs to run in parallel for `fit` and `predict`.
         If -1, then the number of jobs is set to the number of cores.
 
-    model_queue_size : int or None, default=None
+    model_queue_size : int or None, default: None
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
@@ -181,7 +181,7 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
 
     .. seealso:: functions :class:`skopt.gp_minimize`,
-        :class:`skopt.dummy_minimize`
+        :class:`skopt.dummy_minimize`, :class:`skopt.gbrt_minimize`
     """
     return base_minimize(func, dimensions, base_estimator,
                          n_calls=n_calls, n_points=n_points,
diff --git a/skopt/optimizer/gbrt.py b/skopt/optimizer/gbrt.py
index 4af57b953..bd11aa854 100644
--- a/skopt/optimizer/gbrt.py
+++ b/skopt/optimizer/gbrt.py
@@ -59,33 +59,33 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
     base_estimator : `GradientBoostingQuantileRegressor`
         The regressor to use as surrogate model
 
-    n_calls : int, default=100
+    n_calls : int, default: 100
         Number of calls to `func`.
 
-    n_random_starts : int, default=10
+    n_random_starts : int, default: None
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
 
-        .. deprecated:: 0.9
+        .. deprecated:: 0.8
             use `n_initial_points` instead.
 
-    n_initial_points : int, default=10
+    n_initial_points : int, default: 10
         Number of evaluations of `func` with initialization points
         before approximating it with `base_estimator`. Initial point
         generator can be changed by setting `initial_point_generator`.
 
     initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+            default: `"random"`
         Sets a initial points generator. Can be either
 
-        - "random" for uniform random numbers,
-        - "sobol" for a Sobol sequence,
-        - "halton" for a Halton sequence,
-        - "hammersly" for a Hammersly sequence,
-        - "lhs" for a latin hypercube sequence,
-        - "grid" for a uniform grid sequence
+        - `"random"` for uniform random numbers,
+        - `"sobol"` for a Sobol sequence,
+        - `"halton"` for a Halton sequence,
+        - `"hammersly"` for a Hammersly sequence,
+        - `"lhs"` for a latin hypercube sequence,
+        - `"grid"` for a uniform grid sequence
 
-    acq_func : string, default=`"LCB"`
+    acq_func : string, default: `"LCB"`
         Function to minimize over the forest posterior. Can be either
 
         - `"LCB"` for lower confidence bound.
@@ -119,31 +119,31 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         Set random state to something other than None for reproducible
         results.
 
-    verbose : boolean, default=False
+    verbose : boolean, default: False
         Control the verbosity. It is advised to set the verbosity to True
         for long optimization runs.
 
     callback : callable, optional
         If provided, then `callback(res)` is called after call to func.
 
-    n_points : int, default=10000
+    n_points : int, default: 10000
         Number of points to sample when minimizing the acquisition function.
 
-    xi : float, default=0.01
+    xi : float, default: 0.01
         Controls how much improvement one wants over the previous best
         values. Used when the acquisition is either `"EI"` or `"PI"`.
 
-    kappa : float, default=1.96
+    kappa : float, default: 1.96
         Controls how much of the variance in the predicted values should be
         taken into account. If set to be very high, then we are favouring
         exploration over exploitation and vice versa.
         Used when the acquisition is `"LCB"`.
 
-    n_jobs : int, default=1
+    n_jobs : int, default: 1
         The number of jobs to run in parallel for `fit` and `predict`.
         If -1, then the number of jobs is set to the number of cores.
 
-    model_queue_size : int or None, default=None
+    model_queue_size : int or None, default: None
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
@@ -166,6 +166,9 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
 
         For more details related to the OptimizeResult object, refer
         http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
+
+    .. seealso:: functions :class:`skopt.gp_minimize`,
+        :class:`skopt.dummy_minimize`, :class:`skopt.forest_minimize`
     """
     # Check params
     rng = check_random_state(random_state)
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index 2bceb0b50..e91726026 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -80,23 +80,23 @@ def gp_minimize(func, dimensions, base_estimator=None,
         - Noise that is added to the matern kernel. The noise is assumed
           to be iid gaussian.
 
-    n_calls : int, default=100
+    n_calls : int, default: 100
         Number of calls to `func`.
 
-    n_random_starts : int, default=10
+    n_random_starts : int, default: None
         Number of evaluations of `func` with random points before
         approximating it with `base_estimator`.
 
-        .. deprecated:: 0.9
+        .. deprecated:: 0.8
             use `n_initial_points` instead.
 
-    n_initial_points : int, default=10
+    n_initial_points : int, default: 10
         Number of evaluations of `func` with initialization points
         before approximating it with `base_estimator`. Initial point
         generator can be changed by setting `initial_point_generator`.
 
     initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+            default: 'random'
         Sets a initial points generator. Can be either
 
         - `"random"` for uniform random numbers,
@@ -105,7 +105,7 @@ def gp_minimize(func, dimensions, base_estimator=None,
         - `"hammersly"` for a Hammersly sequence,
         - `"lhs"` for a latin hypercube sequence,
 
-    acq_func : string, default=`"gp_hedge"`
+    acq_func : string, default: `"gp_hedge"`
         Function to minimize over the gaussian prior. Can be either
 
         - `"LCB"` for lower confidence bound.
@@ -134,7 +134,7 @@ def gp_minimize(func, dimensions, base_estimator=None,
           return type of the objective function is assumed to be similar to
           that of `"EIps"`
 
-    acq_optimizer : string, `"sampling"` or `"lbfgs"`, default=`"lbfgs"`
+    acq_optimizer : string, `"sampling"` or `"lbfgs"`, default: `"lbfgs"`
         Method to minimize the acquistion function. The fit model
         is updated with the optimal value obtained by optimizing `acq_func`
         with `acq_optimizer`.
@@ -176,7 +176,7 @@ def gp_minimize(func, dimensions, base_estimator=None,
         Set random state to something other than None for reproducible
         results.
 
-    verbose : boolean, default=False
+    verbose : boolean, default: False
         Control the verbosity. It is advised to set the verbosity to True
         for long optimization runs.
 
@@ -184,25 +184,25 @@ def gp_minimize(func, dimensions, base_estimator=None,
         If callable then `callback(res)` is called after each call to `func`.
         If list of callables, then each callable in the list is called.
 
-    n_points : int, default=10000
+    n_points : int, default: 10000
         Number of points to sample to determine the next "best" point.
         Useless if acq_optimizer is set to `"lbfgs"`.
 
-    n_restarts_optimizer : int, default=5
+    n_restarts_optimizer : int, default: 5
         The number of restarts of the optimizer when `acq_optimizer`
         is `"lbfgs"`.
 
-    kappa : float, default=1.96
+    kappa : float, default: 1.96
         Controls how much of the variance in the predicted values should be
         taken into account. If set to be very high, then we are favouring
         exploration over exploitation and vice versa.
         Used when the acquisition is `"LCB"`.
 
-    xi : float, default=0.01
+    xi : float, default: 0.01
         Controls how much improvement one wants over the previous best
         values. Used when the acquisition is either `"EI"` or `"PI"`.
 
-    noise : float, default="gaussian"
+    noise : float, default: "gaussian"
 
         - Use noise="gaussian" if the objective returns noisy observations.
           The noise of each observation is assumed to be iid with
@@ -212,14 +212,14 @@ def gp_minimize(func, dimensions, base_estimator=None,
         - Set this to a value close to zero (1e-10) if the function is
           noise-free. Setting to zero might cause stability issues.
 
-    n_jobs : int, default=1
+    n_jobs : int, default: 1
         Number of cores to run in parallel while running the lbfgs
         optimizations over the acquisition function. Valid only
-        when `acq_optimizer` is set to "lbfgs."
+        when `acq_optimizer` is set to `"lbfgs"`.
         Defaults to 1 core. If `n_jobs=-1`, then number of jobs is set
         to number of cores.
 
-    model_queue_size : int or None, default=None
+    model_queue_size : int or None, default: None
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
@@ -244,7 +244,7 @@ def gp_minimize(func, dimensions, base_estimator=None,
         http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
 
     .. seealso:: functions :class:`skopt.forest_minimize`,
-        :class:`skopt.dummy_minimize`
+        :class:`skopt.dummy_minimize`, :class:`skopt.gbrt_minimize`
 
     """
     # Check params
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index 170a0b0da..d5cae1dd3 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -52,26 +52,26 @@ class Optimizer(object):
         - an instance of a `Dimension` object (`Real`, `Integer` or
           `Categorical`).
 
-    base_estimator : "GP", "RF", "ET", "GBRT" or sklearn regressor, \
-            default="GP"
+    base_estimator : `"GP"`, `"RF"`, `"ET"`, `"GBRT"` or sklearn regressor, \
+            default: `"GP"`
         Should inherit from :obj:`sklearn.base.RegressorMixin`.
         In addition the `predict` method, should have an optional `return_std`
-        argument, which returns `std(Y | x)`` along with `E[Y | x]`.
+        argument, which returns `std(Y | x)` along with `E[Y | x]`.
         If base_estimator is one of ["GP", "RF", "ET", "GBRT"], a default
         surrogate model of the corresponding type is used corresponding to what
         is used in the minimize functions.
 
-    n_random_starts : int, default=10
-        .. deprecated:: 0.9
+    n_random_starts : int, default: 10
+        .. deprecated:: 0.6
             use `n_initial_points` instead.
 
-    n_initial_points : int, default=10
+    n_initial_points : int, default: 10
         Number of evaluations of `func` with initialization points
         before approximating it with `base_estimator`. Initial point
         generator can be changed by setting `initial_point_generator`.
 
     initial_point_generator : str, InitialPointGenerator instance, \
-            default='random'
+            default: `"random"`
         Sets a initial points generator. Can be either
 
         - `"random"` for uniform random numbers,
@@ -81,7 +81,7 @@ class Optimizer(object):
         - `"lhs"` for a latin hypercube sequence,
         - `"grid"` for a uniform grid sequence
 
-    acq_func : string, default="gp_hedge"
+    acq_func : string, default: `"gp_hedge"`
         Function to minimize over the posterior distribution. Can be either
 
         - `"LCB"` for lower confidence bound.
@@ -108,7 +108,7 @@ class Optimizer(object):
           return type of the objective function is assumed to be similar to
           that of `"EIps"`
 
-    acq_optimizer : string, "sampling" or "lbfgs", default="auto"
+    acq_optimizer : string, `"sampling"` or `"lbfgs"`, default: `"auto"`
         Method to minimize the acquisition function. The fit model
         is updated with the optimal value obtained by optimizing `acq_func`
         with `acq_optimizer`.
@@ -131,12 +131,12 @@ class Optimizer(object):
         results.
 
     acq_func_kwargs : dict
-        Additional arguments to be passed to the acquistion function.
+        Additional arguments to be passed to the acquisition function.
 
     acq_optimizer_kwargs : dict
-        Additional arguments to be passed to the acquistion optimizer.
+        Additional arguments to be passed to the acquisition optimizer.
 
-    model_queue_size : int or None, default=None
+    model_queue_size : int or None, default: None
         Keeps list of models only as long as the argument given. In the
         case of None, the list has no capped length.
 
@@ -329,7 +329,7 @@ def copy(self, random_state=None):
     def ask(self, n_points=None, strategy="cl_min"):
         """Query point or multiple points at which objective should be evaluated.
 
-        n_points : int or None, default=None
+        n_points : int or None, default: None
             Number of points returned by the ask method.
             If the value is None, a single point to evaluate is returned.
             Otherwise a list of points to evaluate is returned of size
@@ -337,7 +337,7 @@ def ask(self, n_points=None, strategy="cl_min"):
             parallel, and thus obtain more objective function evaluations per
             unit of time.
 
-        strategy : string, default="cl_min"
+        strategy : string, default: "cl_min"
             Method to use to sample multiple points (see also `n_points`
             description). This parameter is ignored if n_points = None.
             Supported options are `"cl_min"`, `"cl_mean"` or `"cl_max"`.
@@ -468,7 +468,7 @@ def tell(self, x, y, fit=True):
         y : scalar or list
             Value of objective at `x`.
 
-        fit : bool, default=True
+        fit : bool, default: True
             Fit a model to observed evaluations of the objective. A model will
             only be fitted after `n_initial_points` points have been told to
             the optimizer irrespective of the value of `fit`.

From 39efc4a95c411aed5469a99aa85a1383652c96eb Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 22:33:05 +0100
Subject: [PATCH 155/265] Add property, docstring and unit tests

* Optimizer results are returned by the optimizer_results_ property
---
 skopt/searchcv.py            | 14 +++++++++++---
 skopt/tests/test_searchcv.py | 16 ++++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index f8c6ff99c..02a207ddd 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -240,6 +240,10 @@ class BayesSearchCV(BaseSearchCV):
         which gave highest score (or smallest loss if specified)
         on the left out data. Not available if refit=False.
 
+    optimizer_results_ : list of `OptimizeResult`
+        Contains a `OptimizeResult` for each search space. The search space
+        parameter are sorted by its name.
+
     best_score_ : float
         Score of best_estimator on the left out data.
 
@@ -370,6 +374,11 @@ def best_params_(self):
         check_is_fitted(self, 'cv_results_')
         return self.cv_results_['params'][self.best_index_]
 
+    @property
+    def optimizer_results_(self):
+        check_is_fitted(self, '_optim_results')
+        return self._optim_results
+
     # copied for compatibility with 0.19 sklearn from 0.18 BaseSearchCV
     def _fit(self, X, y, groups, parameter_iterable):
         """
@@ -650,7 +659,7 @@ def fit(self, X, y=None, groups=None, callback=None):
         self.cv_results_ = defaultdict(list)
         self.best_index_ = None
         self.multimetric_ = False
-        self.optimizer_results_ = {}
+        self._optim_results = []
 
         n_points = self.n_points
 
@@ -675,8 +684,7 @@ def fit(self, X, y=None, groups=None, callback=None):
 
                 if eval_callbacks(callbacks, optim_result):
                     break
-
-            self.optimizer_results_[optimizer] = optim_result
+            self._optim_results.append(optim_result)
 
         # Refit the best model on the the whole dataset
         if self.refit:
diff --git a/skopt/tests/test_searchcv.py b/skopt/tests/test_searchcv.py
index 1df759d1d..2c87f0ba5 100644
--- a/skopt/tests/test_searchcv.py
+++ b/skopt/tests/test_searchcv.py
@@ -13,6 +13,7 @@
 from sklearn.base import clone
 from sklearn.base import BaseEstimator
 from joblib import cpu_count
+from collections import OrderedDict
 
 from skopt.space import Real, Categorical, Integer
 from skopt import BayesSearchCV
@@ -167,6 +168,10 @@ def test_searchcv_runs_multiple_subspaces():
     # test if all subspaces are explored
     total_evaluations = len(opt.cv_results_['mean_test_score'])
     assert total_evaluations == 1+1+2, "Not all spaces were explored!"
+    assert len(opt.optimizer_results_) == 3
+    assert isinstance(opt.optimizer_results_[0].x[0], LinearSVC)
+    assert isinstance(opt.optimizer_results_[1].x[0], DecisionTreeClassifier)
+    assert isinstance(opt.optimizer_results_[2].x[0], SVC)
 
 
 def test_searchcv_sklearn_compatibility():
@@ -259,14 +264,25 @@ def test_searchcv_reproducibility():
 
     opt.fit(X_train, y_train)
     best_est = opt.best_estimator_
+    optim_res = opt.optimizer_results_[0].x
 
     opt2 = clone(opt).fit(X_train, y_train)
     best_est2 = opt2.best_estimator_
+    optim_res2 = opt2.optimizer_results_[0].x
 
     assert getattr(best_est, 'C') == getattr(best_est2, 'C')
     assert getattr(best_est, 'gamma') == getattr(best_est2, 'gamma')
     assert getattr(best_est, 'degree') == getattr(best_est2, 'degree')
     assert getattr(best_est, 'kernel') == getattr(best_est2, 'kernel')
+    # dict is sorted by alphabet
+    assert optim_res[0] == getattr(best_est, 'C')
+    assert optim_res[2] == getattr(best_est, 'gamma')
+    assert optim_res[1] == getattr(best_est, 'degree')
+    assert optim_res[3] == getattr(best_est, 'kernel')
+    assert optim_res2[0] == getattr(best_est, 'C')
+    assert optim_res2[2] == getattr(best_est, 'gamma')
+    assert optim_res2[1] == getattr(best_est, 'degree')
+    assert optim_res2[3] == getattr(best_est, 'kernel')
 
 
 def test_searchcv_refit():

From 0a15f3cb12b57305bd1b2c401cd40c4fcd0af75e Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Tue, 25 Feb 2020 22:37:26 +0100
Subject: [PATCH 156/265] Remove include

---
 skopt/tests/test_searchcv.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/skopt/tests/test_searchcv.py b/skopt/tests/test_searchcv.py
index 2c87f0ba5..0ee2e5b07 100644
--- a/skopt/tests/test_searchcv.py
+++ b/skopt/tests/test_searchcv.py
@@ -13,7 +13,6 @@
 from sklearn.base import clone
 from sklearn.base import BaseEstimator
 from joblib import cpu_count
-from collections import OrderedDict
 
 from skopt.space import Real, Categorical, Integer
 from skopt import BayesSearchCV

From a06777bda2f84b2b2b6757cb6acbe55adbd5c531 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 26 Feb 2020 12:11:41 +0100
Subject: [PATCH 157/265] Improving plots so that optimizer results from
 BayesSearchCV can be plotet

* Extend example with plot
* Add constant property to dimension
* Skip Constant dimensions in plot_objective
---
 examples/sklearn-gridsearchcv-replacement.py |  9 +++++
 skopt/plots.py                               | 37 ++++++++++++++------
 skopt/space/space.py                         | 27 ++++++++++++++
 3 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/examples/sklearn-gridsearchcv-replacement.py b/examples/sklearn-gridsearchcv-replacement.py
index 2fd181bc9..5413390ff 100644
--- a/examples/sklearn-gridsearchcv-replacement.py
+++ b/examples/sklearn-gridsearchcv-replacement.py
@@ -35,6 +35,8 @@
 """
 print(__doc__)
 import numpy as np
+np.random.seed(123)
+import matplotlib.pyplot as plt
 
 #############################################################################
 # Minimal example
@@ -122,6 +124,13 @@
 
 print("val. score: %s" % opt.best_score_)
 print("test score: %s" % opt.score(X_test, y_test))
+print("best params: %s" % str(opt.best_params_))
+
+#############################################################################
+from skopt.plots import plot_objective
+
+plot_objective(opt.optimizer_results_[0])
+plt.show()
 
 #############################################################################
 # Progress monitoring and control using `callback` argument of `fit` method
diff --git a/skopt/plots.py b/skopt/plots.py
index 2879492d4..df6fb4b0c 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -6,7 +6,7 @@
 from scipy.optimize import OptimizeResult
 
 from skopt import expected_minimum, expected_minimum_random_sampling
-from .space import Categorical
+from .space import Categorical, Space
 
 # For plot tests, matplotlib must be set to headless mode early
 if 'pytest' in sys.modules:
@@ -525,22 +525,36 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         raise ValueError("Valid values for zscale are 'linear' and 'log',"
                          " not '%s'." % zscale)
 
-    fig, ax = plt.subplots(space.n_dims, space.n_dims,
-                           figsize=(size * space.n_dims, size * space.n_dims))
+    n_dims = space.n_dims - space.n_constant_dimensions
+    dim_without_constant = []
+    for dim in space.dimensions:
+        if dim.is_constant:
+            continue
+        dim_without_constant.append(dim)
+    space_without_constant = Space(dim_without_constant)
+    fig, ax = plt.subplots(n_dims, n_dims,
+                           figsize=(size * n_dims, size * n_dims))
 
     fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
                         hspace=0.1, wspace=0.1)
 
+    ax_i = 0
+
     for i in range(space.n_dims):
+        if space.dimensions[i].is_constant:
+            continue
+        ax_j = 0
         for j in range(space.n_dims):
+            if space.dimensions[j].is_constant:
+                continue
             if i == j:
                 xi, yi = partial_dependence(space, result.models[-1], i,
                                             j=None,
                                             sample_points=rvs_transformed,
                                             n_points=n_points, x_eval=x_eval)
 
-                ax[i, i].plot(xi, yi)
-                ax[i, i].axvline(minimum[i], linestyle="--", color="r", lw=1)
+                ax[ax_i, ax_i].plot(xi, yi)
+                ax[ax_i, ax_i].axvline(minimum[i], linestyle="--", color="r", lw=1)
 
             # lower triangle
             elif i > j:
@@ -548,14 +562,17 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                                                 i, j,
                                                 rvs_transformed, n_points,
                                                 x_eval=x_eval)
-                ax[i, j].contourf(xi, yi, zi, levels,
-                                  locator=locator, cmap='viridis_r')
-                ax[i, j].scatter(samples[:, j], samples[:, i],
+                if np.min(zi.shape) > 1:
+                    ax[ax_i, ax_j].contourf(xi, yi, zi, levels,
+                                      locator=locator, cmap='viridis_r')
+                ax[ax_i, ax_j].scatter(samples[:, j], samples[:, i],
                                  c='k', s=10, lw=0.)
-                ax[i, j].scatter(minimum[j], minimum[i],
+                ax[ax_i, ax_j].scatter(minimum[j], minimum[i],
                                  c=['r'], s=20, lw=0.)
+            ax_j += 1
+        ax_i += 1
     ylabel = "Partial dependence"
-    return _format_scatter_plot_axes(ax, space, ylabel=ylabel,
+    return _format_scatter_plot_axes(ax, space_without_constant, ylabel=ylabel,
                                      dim_labels=dimensions)
 
 
diff --git a/skopt/space/space.py b/skopt/space/space.py
index 1b7f49ea3..fd1156e8f 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -166,6 +166,10 @@ def transformed_size(self):
     def bounds(self):
         raise NotImplementedError
 
+    @property
+    def is_constant(self):
+        raise NotImplementedError
+
     @property
     def transformed_bounds(self):
         raise NotImplementedError
@@ -329,6 +333,10 @@ def inverse_transform(self, Xt):
     def bounds(self):
         return (self.low, self.high)
 
+    @property
+    def is_constant(self):
+        return self.low == self.high
+
     def __contains__(self, point):
         if isinstance(point, list):
             point = np.array(point)
@@ -504,6 +512,10 @@ def inverse_transform(self, Xt):
     def bounds(self):
         return (self.low, self.high)
 
+    @property
+    def is_constant(self):
+        return self.low == self.high
+
     def __contains__(self, point):
         if isinstance(point, list):
             point = np.array(point)
@@ -669,6 +681,10 @@ def transformed_size(self):
     def bounds(self):
         return self.categories
 
+    @property
+    def is_constant(self):
+        return len(self.categories) <= 1
+
     def __contains__(self, point):
         return point in self.categories
 
@@ -1018,6 +1034,17 @@ def is_partly_categorical(self):
         """Space contains any categorical dimensions"""
         return any([isinstance(dim, Categorical) for dim in self.dimensions])
 
+    @property
+    def n_constant_dimensions(self):
+        """Returns the number of constant dimensions which have zero degree of
+        freedom, e.g. an Integer dimensions with (0., 0.) as bounds.
+        """
+        n = 0
+        for dim in self.dimensions:
+            if dim.is_constant:
+                n += 1
+        return n
+
     def distance(self, point_a, point_b):
         """Compute distance between two points in this space.
 

From fac41e3f175b3ac65ed11e99654de0f48c432636 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 26 Feb 2020 13:05:02 +0100
Subject: [PATCH 158/265] added n_jobs support to optimizer

As PR #627 was deleted, it's changes were added here again.
* docstring and unittest added
* Missing n_jobs in forest_minimize has been added
---
 skopt/optimizer/base.py        |  7 +++++--
 skopt/optimizer/forest.py      |  1 +
 skopt/optimizer/optimizer.py   | 14 ++++++++++----
 skopt/tests/test_forest_opt.py | 10 ++++++++--
 skopt/tests/test_optimizer.py  | 10 +++++++++-
 skopt/utils.py                 |  3 +++
 6 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index b1821c018..8cde60774 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -178,8 +178,10 @@ def base_minimize(func, dimensions, base_estimator,
 
     n_jobs : int, default: 1
         Number of cores to run in parallel while running the lbfgs
-        optimizations over the acquisition function. Valid only when
-        `acq_optimizer` is set to "lbfgs."
+        optimizations over the acquisition function and given to
+        the base_estimator. Valid only when
+        `acq_optimizer` is set to "lbfgs". or when the base_estimator
+        supports n_jobs as parameter and was given as string.
         Defaults to 1 core. If `n_jobs=-1`, then number of jobs is set
         to number of cores.
 
@@ -254,6 +256,7 @@ def base_minimize(func, dimensions, base_estimator,
     optimizer = Optimizer(dimensions, base_estimator,
                           n_initial_points=n_initial_points,
                           initial_point_generator=initial_point_generator,
+                          n_jobs=n_jobs,
                           acq_func=acq_func, acq_optimizer=acq_optimizer,
                           random_state=random_state,
                           model_queue_size=model_queue_size,
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index f1722ec17..9ceada655 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -189,6 +189,7 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
                          n_initial_points=n_initial_points,
                          initial_point_generator=initial_point_generator,
                          x0=x0, y0=y0, random_state=random_state,
+                         n_jobs=n_jobs,
                          acq_func=acq_func,
                          xi=xi, kappa=kappa, verbose=verbose,
                          callback=callback, acq_optimizer="sampling",
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index d5cae1dd3..0cb559ff1 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -130,6 +130,12 @@ class Optimizer(object):
         Set random state to something other than None for reproducible
         results.
 
+    n_jobs : int, default: 1
+        The number of jobs to run in parallel in the base_estimator,
+        if the base_estimator supports n_jobs as parameter and
+        base_estimator was given as string.
+        If -1, then the number of jobs is set to the number of cores.
+
     acq_func_kwargs : dict
         Additional arguments to be passed to the acquisition function.
 
@@ -158,7 +164,7 @@ class Optimizer(object):
     def __init__(self, dimensions, base_estimator="gp",
                  n_random_starts=None, n_initial_points=10,
                  initial_point_generator="random",
-                 acq_func="gp_hedge",
+                 n_jobs=1, acq_func="gp_hedge",
                  acq_optimizer="auto",
                  random_state=None,
                  model_queue_size=None,
@@ -210,7 +216,8 @@ def __init__(self, dimensions, base_estimator="gp",
         if isinstance(base_estimator, str):
             base_estimator = cook_estimator(
                 base_estimator, space=dimensions,
-                random_state=self.rng.randint(0, np.iinfo(np.int32).max))
+                random_state=self.rng.randint(0, np.iinfo(np.int32).max),
+                n_jobs=n_jobs)
 
         # check if regressor
         if not is_regressor(base_estimator) and base_estimator is not None:
@@ -251,8 +258,7 @@ def __init__(self, dimensions, base_estimator="gp",
         self.n_points = acq_optimizer_kwargs.get("n_points", 10000)
         self.n_restarts_optimizer = acq_optimizer_kwargs.get(
             "n_restarts_optimizer", 5)
-        n_jobs = acq_optimizer_kwargs.get("n_jobs", 1)
-        self.n_jobs = n_jobs
+        self.n_jobs = acq_optimizer_kwargs.get("n_jobs", 1)
         self.acq_optimizer_kwargs = acq_optimizer_kwargs
 
         # Configure search space
diff --git a/skopt/tests/test_forest_opt.py b/skopt/tests/test_forest_opt.py
index b0aaf2566..5841b0acd 100644
--- a/skopt/tests/test_forest_opt.py
+++ b/skopt/tests/test_forest_opt.py
@@ -28,11 +28,11 @@ def test_forest_minimize_api(base_estimator):
 
 
 def check_minimize(minimizer, func, y_opt, dimensions, margin,
-                   n_calls, n_initial_points=10, x0=None):
+                   n_calls, n_initial_points=10, x0=None, n_jobs=1):
     for n in range(3):
         r = minimizer(
             func, dimensions, n_calls=n_calls, random_state=n,
-            n_initial_points=n_initial_points, x0=x0)
+            n_initial_points=n_initial_points, x0=x0, n_jobs=n_jobs)
         assert r.fun < y_opt + margin
 
 
@@ -57,6 +57,12 @@ def test_tree_based_minimize(name, minimizer):
                    [("-2", "-1", "0", "1", "2")], 0.05, 5, 1)
 
 
+@pytest.mark.slow_test
+def test_tree_based_minimize_n_jobs():
+    check_minimize(forest_minimize, bench1, 0.05,
+                   [(-2.0, 2.0)], 0.05, 25, 5, n_jobs=2)
+
+
 @pytest.mark.fast_test
 def test_categorical_integer():
     def f(params):
diff --git a/skopt/tests/test_optimizer.py b/skopt/tests/test_optimizer.py
index a49ddc7bb..d318b0e82 100644
--- a/skopt/tests/test_optimizer.py
+++ b/skopt/tests/test_optimizer.py
@@ -304,6 +304,13 @@ def test_optimizer_base_estimator_string_smoke(base_estimator):
     opt.run(func=lambda x: x[0]**2, n_iter=3)
 
 
+@pytest.mark.fast_test
+def test_optimizer_base_estimator_string_smoke_njobs():
+    opt = Optimizer([(-2.0, 2.0)], base_estimator="GBRT",
+                    n_initial_points=1, acq_func="EI", n_jobs=-1)
+    opt.run(func=lambda x: x[0]**2, n_iter=3)
+
+
 def test_defaults_are_equivalent():
     # check that the defaults of Optimizer reproduce the defaults of
     # gp_minimize
@@ -384,7 +391,8 @@ def test_categorical_only2():
     opt = Optimizer(space,
                     base_estimator=GaussianProcessRegressor(alpha=1e-7),
                     acq_optimizer='lbfgs',
-                    n_initial_points=10)
+                    n_initial_points=10,
+                    n_jobs=2)
 
     next_x = opt.ask(n_points=4)
     assert len(next_x) == 4
diff --git a/skopt/utils.py b/skopt/utils.py
index 7d2a5f925..da6d54d0d 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -400,6 +400,9 @@ def cook_estimator(base_estimator, space=None, **kwargs):
     elif base_estimator == "DUMMY":
         return None
 
+    if ('n_jobs' in kwargs.keys()) and not hasattr(base_estimator, 'n_jobs'):
+        del kwargs['n_jobs']
+
     base_estimator.set_params(**kwargs)
     return base_estimator
 

From 24c965bd2af085445e2d2d1776a5a0d2e3cf3653 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 26 Feb 2020 13:09:19 +0100
Subject: [PATCH 159/265] Add entry to whats_new

---
 doc/whats_new/v0.8.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 7d791b466..914a9ad01 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -8,6 +8,13 @@ Version 0.8.0
 =============
 **In Development**
 
+:mod:`skopt.Optimizer`
+----------------------
+- |Enhancement| n_jobs support was added to Optimizer and
+  fixed for forest_minimize
+  :pr:`884` by :user:`Holger Nahrstaedt <holgern>`
+  based on :pr:`627` by :user:`JPN <jonathanng>`
+
 :mod:`skopt.sampler`
 --------------------
 - |MajorFeature| Initial sampling generation

From 7cd316b39a879c5ef1fd649bc398808f24b3e1e5 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 26 Feb 2020 16:48:36 +0100
Subject: [PATCH 160/265] Put back in plot_objective for better evaluation

---
 skopt/plots.py | 365 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 365 insertions(+)

diff --git a/skopt/plots.py b/skopt/plots.py
index ffc2584e3..b060964f6 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -198,6 +198,371 @@ def plot_regret(*args, **kwargs):
     return ax
 
 
+def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None):
+    # Work out min, max of y axis for the diagonal so we can adjust
+    # them all to the same value
+    diagonal_ylim = (np.min([ax[i, i].get_ylim()[0]
+                             for i in range(space.n_dims)]),
+                     np.max([ax[i, i].get_ylim()[1]
+                             for i in range(space.n_dims)]))
+
+    if dim_labels is None:
+        dim_labels = ["$X_{%i}$" % i if d.name is None else d.name
+                      for i, d in enumerate(space.dimensions)]
+    # Axes for categorical dimensions are really integers; we have to
+    # label them with the category names
+    iscat = [isinstance(dim, Categorical) for dim in space.dimensions]
+
+    # Deal with formatting of the axes
+    for i in range(space.n_dims):  # rows
+        for j in range(space.n_dims):  # columns
+            ax_ = ax[i, j]
+
+            if j > i:
+                ax_.axis("off")
+            elif i > j:  # off-diagonal plots
+                # plots on the diagonal are special, like Texas. They have
+                # their own range so do not mess with them.
+                if not iscat[i]:  # bounds not meaningful for categoricals
+                    ax_.set_ylim(*space.dimensions[i].bounds)
+                if iscat[j]:
+                    # partial() avoids creating closures in a loop
+                    ax_.xaxis.set_major_formatter(FuncFormatter(
+                        partial(_cat_format, space.dimensions[j])))
+                else:
+                    ax_.set_xlim(*space.dimensions[j].bounds)
+                if j == 0:  # only leftmost column (0) gets y labels
+                    ax_.set_ylabel(dim_labels[i])
+                    if iscat[i]:  # Set category labels for left column
+                        ax_.yaxis.set_major_formatter(FuncFormatter(
+                            partial(_cat_format, space.dimensions[i])))
+                else:
+                    ax_.set_yticklabels([])
+
+                # for all rows except ...
+                if i < space.n_dims - 1:
+                    ax_.set_xticklabels([])
+                # ... the bottom row
+                else:
+                    [l.set_rotation(45) for l in ax_.get_xticklabels()]
+                    ax_.set_xlabel(dim_labels[j])
+
+                # configure plot for linear vs log-scale
+                if space.dimensions[j].prior == 'log-uniform':
+                    ax_.set_xscale('log')
+                else:
+                    ax_.xaxis.set_major_locator(MaxNLocator(6, prune='both',
+                                                            integer=iscat[j]))
+
+                if space.dimensions[i].prior == 'log-uniform':
+                    ax_.set_yscale('log')
+                else:
+                    ax_.yaxis.set_major_locator(MaxNLocator(6, prune='both',
+                                                            integer=iscat[i]))
+
+            else:  # diagonal plots
+                ax_.set_ylim(*diagonal_ylim)
+                ax_.yaxis.tick_right()
+                ax_.yaxis.set_label_position('right')
+                ax_.yaxis.set_ticks_position('both')
+                ax_.set_ylabel(ylabel)
+
+                ax_.xaxis.tick_top()
+                ax_.xaxis.set_label_position('top')
+                ax_.set_xlabel(dim_labels[j])
+
+                if space.dimensions[i].prior == 'log-uniform':
+                    ax_.set_xscale('log')
+                else:
+                    ax_.xaxis.set_major_locator(MaxNLocator(6, prune='both',
+                                                            integer=iscat[i]))
+                    if iscat[i]:
+                        ax_.xaxis.set_major_formatter(FuncFormatter(
+                            partial(_cat_format, space.dimensions[i])))
+
+    return ax
+
+
+def partial_dependence(space, model, i, j=None, sample_points=None,
+                       n_samples=250, n_points=40, x_eval=None):
+    """Calculate the partial dependence for dimensions `i` and `j` with
+    respect to the objective value, as approximated by `model`.
+
+    The partial dependence plot shows how the value of the dimensions
+    `i` and `j` influence the `model` predictions after "averaging out"
+    the influence of all other dimensions.
+
+    When `x_eval` is not `None`, the given values are used instead of
+    random samples. In this case, `n_samples` will be ignored.
+
+    Parameters
+    ----------
+    space : `Space`
+        The parameter space over which the minimization was performed.
+
+    model
+        Surrogate model for the objective function.
+
+    i : int
+        The first dimension for which to calculate the partial dependence.
+
+    j : int, default=None
+        The second dimension for which to calculate the partial dependence.
+        To calculate the 1D partial dependence on `i` alone set `j=None`.
+
+    sample_points : np.array, shape=(n_points, n_dims), default=None
+        Only used when `x_eval=None`, i.e in case partial dependence should
+        be calculated.
+        Randomly sampled and transformed points to use when averaging
+        the model function at each of the `n_points` when using partial
+        dependence.
+
+    n_samples : int, default=100
+        Number of random samples to use for averaging the model function
+        at each of the `n_points` when using partial dependence. Only used
+        when `sample_points=None` and `x_eval=None`.
+
+    n_points : int, default=40
+        Number of points at which to evaluate the partial dependence
+        along each dimension `i` and `j`.
+
+    x_eval : list, default=None
+        `x_eval` is a list of parameter values or None. In case `x_eval`
+        is not None, the parsed dependence will be calculated using these
+        values.
+        Otherwise, random selected samples will be used.
+
+    Returns
+    -------
+    For 1D partial dependence:
+
+    xi : np.array
+        The points at which the partial dependence was evaluated.
+
+    yi : np.array
+        The value of the model at each point `xi`.
+
+    For 2D partial dependence:
+
+    xi : np.array, shape=n_points
+        The points at which the partial dependence was evaluated.
+    yi : np.array, shape=n_points
+        The points at which the partial dependence was evaluated.
+    zi : np.array, shape=(n_points, n_points)
+        The value of the model at each point `(xi, yi)`.
+
+    For Categorical variables, the `xi` (and `yi` for 2D) returned are
+    the indices of the variable in `Dimension.categories`.
+    """
+    # The idea is to step through one dimension, evaluating the model with
+    # that dimension fixed and averaging either over random values or over
+    # the given ones in x_val in all other dimensions.
+    # (Or step through 2 dimensions when i and j are given.)
+    # Categorical dimensions make this interesting, because they are one-
+    # hot-encoded, so there is a one-to-many mapping of input dimensions
+    # to transformed (model) dimensions.
+
+    # If we haven't parsed an x_eval list we use random sampled values instead
+    if x_eval is None and sample_points is None:
+        sample_points = space.transform(space.rvs(n_samples=n_samples))
+    elif sample_points is None:
+        sample_points = space.transform([x_eval])
+
+    # dim_locs[i] is the (column index of the) start of dim i in
+    # sample_points.
+    # This is usefull when we are using one hot encoding, i.e using
+    # categorical values
+    dim_locs = np.cumsum([0] + [d.transformed_size for d in space.dimensions])
+
+    if j is None:
+        # We sample evenly instead of randomly. This is necessary when using
+        # categorical values
+        xi, xi_transformed = _evenly_sample(space.dimensions[i], n_points)
+        yi = []
+        for x_ in xi_transformed:
+            rvs_ = np.array(sample_points)  # copy
+            # We replace the values in the dimension that we want to keep
+            # fixed
+            rvs_[:, dim_locs[i]:dim_locs[i + 1]] = x_
+            # In case of `x_eval=None` rvs conists of random samples.
+            # Calculating the mean of these samples is how partial dependence
+            # is implemented.
+            yi.append(np.mean(model.predict(rvs_)))
+
+        return xi, yi
+
+    else:
+        xi, xi_transformed = _evenly_sample(space.dimensions[j], n_points)
+        yi, yi_transformed = _evenly_sample(space.dimensions[i], n_points)
+
+        zi = []
+        for x_ in xi_transformed:
+            row = []
+            for y_ in yi_transformed:
+                rvs_ = np.array(sample_points)  # copy
+                rvs_[:, dim_locs[j]:dim_locs[j + 1]] = x_
+                rvs_[:, dim_locs[i]:dim_locs[i + 1]] = y_
+                row.append(np.mean(model.predict(rvs_)))
+            zi.append(row)
+
+        return xi, yi, np.array(zi).T
+
+
+def plot_objective_old(result, levels=10, n_points=40, n_samples=250, size=2,
+                       zscale='linear', dimensions=None, sample_source='random',
+                       minimum='result', n_minimum_search=None):
+    """Pairwise dependence plot of the objective function.
+
+    The diagonal shows the partial dependence for dimension `i` with
+    respect to the objective function. The off-diagonal shows the
+    partial dependence for dimensions `i` and `j` with
+    respect to the objective function. The objective function is
+    approximated by `result.model.`
+
+    Pairwise scatter plots of the points at which the objective
+    function was directly evaluated are shown on the off-diagonal.
+    A red point indicates per default the best observed minimum, but
+    this can be changed by changing argument ´minimum´.
+
+    Parameters
+    ----------
+    result : `OptimizeResult`
+        The result for which to create the scatter plot matrix.
+
+    levels : int, default=10
+        Number of levels to draw on the contour plot, passed directly
+        to `plt.contour()`.
+
+    n_points : int, default=40
+        Number of points at which to evaluate the partial dependence
+        along each dimension.
+
+    n_samples : int, default=250
+        Number of samples to use for averaging the model function
+        at each of the `n_points` when `sample_method` is set to 'random'.
+
+    size : float, default=2
+        Height (in inches) of each facet.
+
+    zscale : str, default='linear'
+        Scale to use for the z axis of the contour plots. Either 'linear'
+        or 'log'.
+
+    dimensions : list of str, default=None
+        Labels of the dimension
+        variables. `None` defaults to `space.dimensions[i].name`, or
+        if also `None` to `['X_0', 'X_1', ..]`.
+
+    sample_source : str or list of floats, default='random'
+        Defines to samples generation to use for averaging the model function
+        at each of the `n_points`.
+
+        A partial dependence plot is only generated, when `sample_source`
+        is set to 'random' and `n_samples` is sufficient.
+
+        `sample_source` can also be a list of
+        floats, which is then used for averaging.
+
+        Valid strings:
+
+            - 'random' - `n_samples` random samples will used
+
+            - 'result' - Use only the best observed parameters
+
+            - 'expected_minimum' - Parameters that gives the best
+                  minimum Calculated using scipy's minimize method.
+                  This method currently does not work with categorical values.
+
+            - 'expected_minimum_random' - Parameters that gives the
+                  best minimum when using naive random sampling.
+                  Works with categorical values.
+
+    minimum : str or list of floats, default = 'result'
+        Defines the values for the red points in the plots.
+        Valid strings:
+
+            - 'result' - Use best observed parameters
+
+            - 'expected_minimum' - Parameters that gives the best
+                  minimum Calculated using scipy's minimize method.
+                  This method currently does not work with categorical values.
+
+            - 'expected_minimum_random' - Parameters that gives the
+                  best minimum when using naive random sampling.
+                  Works with categorical values
+
+    n_minimum_search : int, default = None
+        Determines how many points should be evaluated
+        to find the minimum when using 'expected_minimum' or
+        'expected_minimum_random'. Parameter is used when
+        `sample_source` and/or `minimum` is set to
+        'expected_minimum' or 'expected_minimum_random'.
+
+    Returns
+    -------
+    ax : `Axes`
+        The matplotlib axes.
+    """
+    # Here we define the values for which to plot the red dot (2d plot) and
+    # the red dotted line (1d plot).
+    # These same values will be used for evaluating the plots when
+    # calculating dependence. (Unless partial
+    # dependence is to be used instead).
+    space = result.space
+    if space.n_dims == 1:
+        raise ValueError("plot_objective needs at least two"
+                         "variables. Found only one.")
+    x_vals = _evaluate_min_params(result, minimum, n_minimum_search)
+    if sample_source == "random":
+        x_eval = None
+    else:
+        x_eval = _evaluate_min_params(result, sample_source,
+                                      n_minimum_search)
+    rvs_transformed = space.transform(space.rvs(n_samples=n_samples))
+    samples, minimum, _ = _map_categories(space, result.x_iters, x_vals)
+
+    if zscale == 'log':
+        locator = LogLocator()
+    elif zscale == 'linear':
+        locator = None
+    else:
+        raise ValueError("Valid values for zscale are 'linear' and 'log',"
+                         " not '%s'." % zscale)
+
+    fig, ax = plt.subplots(space.n_dims, space.n_dims,
+                           figsize=(size * space.n_dims, size * space.n_dims))
+
+    fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
+                        hspace=0.1, wspace=0.1)
+
+    for i in range(space.n_dims):
+        for j in range(space.n_dims):
+            if i == j:
+                xi, yi = partial_dependence(space, result.models[-1], i,
+                                            j=None,
+                                            sample_points=rvs_transformed,
+                                            n_points=n_points, x_eval=x_eval)
+
+                ax[i, i].plot(xi, yi)
+                ax[i, i].axvline(minimum[i], linestyle="--", color="r", lw=1)
+
+            # lower triangle
+            elif i > j:
+                xi, yi, zi = partial_dependence(space, result.models[-1],
+                                                i, j,
+                                                rvs_transformed, n_points,
+                                                x_eval=x_eval)
+                ax[i, j].contourf(xi, yi, zi, levels,
+                                  locator=locator, cmap='viridis_r')
+                ax[i, j].scatter(samples[:, j], samples[:, i],
+                                 c='k', s=10, lw=0.)
+                ax[i, j].scatter(minimum[j], minimum[i],
+                                 c=['r'], s=20, lw=0.)
+    ylabel = "Partial dependence"
+    return _format_scatter_plot_axes(ax, space, ylabel=ylabel,
+                                     dim_labels=dimensions)
+
+
 def _get_ylim_diagonal(ax):
     """Get the min / max of the ylim for all diagonal plots.
     This is used in _adjust_fig() so the ylim is the same

From dba85f60a70012e16dd951016ad504fb2eb5ea9e Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 26 Feb 2020 16:50:13 +0100
Subject: [PATCH 161/265] Put back in current code for better merging

---
 skopt/plots.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/skopt/plots.py b/skopt/plots.py
index b060964f6..d252ab7fb 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -563,6 +563,73 @@ def plot_objective_old(result, levels=10, n_points=40, n_samples=250, size=2,
                                      dim_labels=dimensions)
 
 
+def plot_evaluations_old(result, bins=20, dimensions=None):
+    """Visualize the order in which points where sampled.
+
+    The scatter plot matrix shows at which points in the search
+    space and in which order samples were evaluated. Pairwise
+    scatter plots are shown on the off-diagonal for each
+    dimension of the search space. The order in which samples
+    were evaluated is encoded in each point's color.
+    The diagonal shows a histogram of sampled values for each
+    dimension. A red point indicates the found minimum.
+
+    Parameters
+    ----------
+    result : `OptimizeResult`
+        The result for which to create the scatter plot matrix.
+
+    bins : int, bins=20
+        Number of bins to use for histograms on the diagonal.
+
+    dimensions : list of str, default=None
+        Labels of the dimension
+        variables. `None` defaults to `space.dimensions[i].name`, or
+        if also `None` to `['X_0', 'X_1', ..]`.
+
+    Returns
+    -------
+    ax : `Axes`
+        The matplotlib axes.
+    """
+    space = result.space
+    # Convert categoricals to integers, so we can ensure consistent ordering.
+    # Assign indices to categories in the order they appear in the Dimension.
+    # Matplotlib's categorical plotting functions are only present in v 2.1+,
+    # and may order categoricals differently in different plots anyway.
+    samples, minimum, iscat = _map_categories(space, result.x_iters, result.x)
+    order = range(samples.shape[0])
+    fig, ax = plt.subplots(space.n_dims, space.n_dims,
+                           figsize=(2 * space.n_dims, 2 * space.n_dims))
+
+    fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
+                        hspace=0.1, wspace=0.1)
+
+    for i in range(space.n_dims):
+        for j in range(space.n_dims):
+            if i == j:
+                if iscat[j]:
+                    bins_ = len(space.dimensions[j].categories)
+                elif space.dimensions[j].prior == 'log-uniform':
+                    low, high = space.bounds[j]
+                    bins_ = np.logspace(np.log10(low), np.log10(high), bins)
+                else:
+                    bins_ = bins
+                ax[i, i].hist(
+                    samples[:, j], bins=bins_,
+                    range=None if iscat[j] else space.dimensions[j].bounds)
+
+            # lower triangle
+            elif i > j:
+                ax[i, j].scatter(samples[:, j], samples[:, i],
+                                 c=order, s=40, lw=0., cmap='viridis')
+                ax[i, j].scatter(minimum[j], minimum[i],
+                                 c=['r'], s=20, lw=0.)
+
+    return _format_scatter_plot_axes(ax, space, ylabel="Number of samples",
+                                     dim_labels=dimensions)
+
+
 def _get_ylim_diagonal(ax):
     """Get the min / max of the ylim for all diagonal plots.
     This is used in _adjust_fig() so the ylim is the same

From 73e6beabf90d072a3a8ef7c3f4aa798e3fc585eb Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 26 Feb 2020 17:50:04 +0100
Subject: [PATCH 162/265] Fix pep8 and next_x estimation in plot

---
 examples/bayesian-optimization.py             |  9 ++--
 examples/exploration-vs-exploitation.py       |  8 ++--
 ...optimizer-with-different-base-estimator.py | 16 +++----
 skopt/plots.py                                | 43 +++++++++----------
 4 files changed, 36 insertions(+), 40 deletions(-)

diff --git a/examples/bayesian-optimization.py b/examples/bayesian-optimization.py
index c04ada91c..af779e094 100644
--- a/examples/bayesian-optimization.py
+++ b/examples/bayesian-optimization.py
@@ -40,7 +40,7 @@
 
 2. optimize a cheap acquisition/utility function $u$ based on the posterior
    distribution for sampling the next point.
-   .. math::`x_{t+1} = arg \min_x u(x)`
+   .. math::`x_{t+1} = arg \\min_x u(x)`
    Exploit uncertainty to balance exploration against exploitation.
 
 3. Sample the next observation :math:`y_{t+1}` at :math:`x_{t+1}`.
@@ -53,7 +53,7 @@
 tried next:
 
 - Expected improvement (default):
-  :math:`-EI(x) = -\mathbb{E} [f(x) - f(x_t^+)]`
+  :math:`-EI(x) = -\\mathbb{E} [f(x) - f(x_t^+)]`
 - Lower confidence bound: :math:`LCB(x) = \mu_{GP}(x) + \kappa \sigma_{GP}(x)`
 - Probability of improvement: :math:`-PI(x) = -P(f(x) \geq f(x_t^+) + \kappa)`
 
@@ -69,6 +69,7 @@
 import numpy as np
 np.random.seed(237)
 import matplotlib.pyplot as plt
+from skopt.plots import plot_gaussian_process
 
 #############################################################################
 # Toy example
@@ -148,12 +149,14 @@ def f(x, noise_level=noise_level):
 # 2. The acquisition values that determine the next point to be queried.
 
 plt.rcParams["figure.figsize"] = (8, 14)
+
+
 def f_wo_noise(x):
     return f(x, noise_level=0)
 
 #############################################################################
 # Plot the 5 iterations following the 5 random points
-from skopt.plots import plot_gaussian_process
+
 for n_iter in range(5):
     # Plot true function.
     plt.subplot(5, 2, 2*n_iter+1)
diff --git a/examples/exploration-vs-exploitation.py b/examples/exploration-vs-exploitation.py
index 78fc740e1..bdda52c07 100644
--- a/examples/exploration-vs-exploitation.py
+++ b/examples/exploration-vs-exploitation.py
@@ -35,7 +35,9 @@
 import numpy as np
 np.random.seed(1234)
 import matplotlib.pyplot as plt
-
+from skopt.learning import ExtraTreesRegressor
+from skopt import Optimizer
+from skopt.plots import plot_gaussian_process
 
 #############################################################################
 # Toy example
@@ -45,10 +47,6 @@
 # All points after the first one is therefore choosen by the acquisition
 # function.
 
-from skopt.learning import ExtraTreesRegressor
-from skopt import Optimizer
-from skopt.plots import plot_gaussian_process
-
 noise_level = 0.1
 
 # Our 1D toy problem, this is the function we are trying to
diff --git a/examples/optimizer-with-different-base-estimator.py b/examples/optimizer-with-different-base-estimator.py
index e51d21794..f8682243c 100644
--- a/examples/optimizer-with-different-base-estimator.py
+++ b/examples/optimizer-with-different-base-estimator.py
@@ -20,7 +20,8 @@
 import numpy as np
 np.random.seed(1234)
 import matplotlib.pyplot as plt
-
+from skopt.plots import plot_gaussian_process
+from skopt import Optimizer
 
 #############################################################################
 # Toy example
@@ -41,22 +42,19 @@ def objective_wo_noise(x):
 
 #############################################################################
 
-from skopt import Optimizer
 opt_gp = Optimizer([(-2.0, 2.0)], base_estimator="GP", n_initial_points=5,
                 acq_optimizer="sampling", random_state=42)
 
 #############################################################################
 
-from skopt.plots import plot_gaussian_process
-
-def plot_optimizer(res, next_x, n_iter, max_iters=5):
+def plot_optimizer(res, n_iter, max_iters=5):
     if n_iter == 0:
         show_legend = True
     else:
         show_legend = False
     ax = plt.subplot(max_iters, 2, 2 * n_iter + 1)
     # Plot GP(x) + contours
-    ax = plot_gaussian_process(res,  ax=ax,
+    ax = plot_gaussian_process(res, ax=ax,
                                objective=objective_wo_noise,
                                noise_level=noise_level,
                                show_legend=show_legend, show_title=True,
@@ -71,7 +69,7 @@ def plot_optimizer(res, next_x, n_iter, max_iters=5):
                                noise_level=noise_level,
                                show_legend=show_legend, show_title=False,
                                show_next_point=True, show_acq_func=True,
-                               next_x=next_x, show_observations=False,
+                               show_observations=False,
                                show_mu=False)
     ax.set_ylabel("")
     ax.set_xlabel("")
@@ -90,7 +88,7 @@ def plot_optimizer(res, next_x, n_iter, max_iters=5):
     f_val = objective(next_x)
     res = opt_gp.tell(next_x, f_val)
     if i >= 5:
-        plot_optimizer(res, opt_gp._next_x, n_iter=i-5, max_iters=5)
+        plot_optimizer(res, n_iter=i-5, max_iters=5)
 plt.tight_layout(rect=[0, 0.03, 1, 0.95])
 plt.plot()
 
@@ -132,6 +130,6 @@ def plot_optimizer(res, next_x, n_iter, max_iters=5):
         f_val = objective(next_x)
         res = opt.tell(next_x, f_val)
         if i >= 5:
-            plot_optimizer(res, opt._next_x, n_iter=i - 5, max_iters=5)
+            plot_optimizer(res, n_iter=i - 5, max_iters=5)
     plt.tight_layout(rect=[0, 0.03, 1, 0.95])
     plt.show()
diff --git a/skopt/plots.py b/skopt/plots.py
index 4876ed05b..29c924f17 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -139,9 +139,6 @@ def plot_gaussian_process(res, **kwargs):
     show_next_point : boolean, default=False
         When True, the next evaluated point is plotted
 
-    next_x : float, default=None
-        The next evaluated Point can also be defined.
-
     show_observations : boolean, default=True
         When True, observations are plotted as dots.
 
@@ -161,7 +158,6 @@ def plot_gaussian_process(res, **kwargs):
     show_title = kwargs.get("show_title", True)
     show_acq_func = kwargs.get("show_acq_func", False)
     show_next_point = kwargs.get("show_next_point", False)
-    next_x = kwargs.get("next_x", None)
     show_observations = kwargs.get("show_observations", True)
     show_mu = kwargs.get("show_mu", True)
     acq_func = kwargs.get("acq_func", None)
@@ -203,14 +199,17 @@ def plot_gaussian_process(res, **kwargs):
     # Plot true function.
     if objective is not None:
         ax.plot(x, fx, "r--", label="True (unknown)")
-        ax.fill(np.concatenate([x, x[::-1]]),
-                np.concatenate(([fx_i - 1.9600 * noise_level for fx_i in fx],
-                                [fx_i + 1.9600 * noise_level for fx_i in fx[::-1]])),
-                alpha=.2, fc="r", ec="None")
+        ax.fill(np.concatenate(
+            [x, x[::-1]]),
+            np.concatenate(([fx_i - 1.9600 * noise_level
+                             for fx_i in fx],
+                            [fx_i + 1.9600 * noise_level
+                             for fx_i in fx[::-1]])),
+            alpha=.2, fc="r", ec="None")
 
     # Plot GP(x) + contours
-    y_pred, sigma = gp.predict(x_gp, return_std=True)
     if show_mu:
+        y_pred, sigma = gp.predict(x_gp, return_std=True)
         ax.plot(x, y_pred, "g--", label=r"$\mu_{GP}(x)$")
         ax.fill(np.concatenate([x, x[::-1]]),
                 np.concatenate([y_pred - 1.9600 * sigma,
@@ -221,7 +220,8 @@ def plot_gaussian_process(res, **kwargs):
     if show_observations:
         ax.plot(curr_x_iters, curr_func_vals,
                 "r.", markersize=8, label="Observations")
-    if (show_mu or show_observations or objective is not None) and show_acq_func:
+    if (show_mu or show_observations or objective is not None)\
+            and show_acq_func:
         ax_ei = ax.twinx()
         ax_ei.set_ylabel(str(acq_func) + "(x)")
         plot_both = True
@@ -230,23 +230,20 @@ def plot_gaussian_process(res, **kwargs):
         plot_both = False
     if show_acq_func:
         acq = _gaussian_acquisition(x_gp, gp, y_opt=np.min(curr_func_vals),
-                                    acq_func=acq_func, acq_func_kwargs=acq_func_kwargs)
+                                    acq_func=acq_func,
+                                    acq_func_kwargs=acq_func_kwargs)
+        next_x = x[np.argmin(acq)]
+        next_acq = acq[np.argmin(acq)]
         if acq_func in ["EI", "PI", "EIps", "PIps"]:
             acq = - acq
+            next_acq = -next_acq
         ax_ei.plot(x, acq, "b", label=str(acq_func) + "(x)")
         if not plot_both:
             ax_ei.fill_between(x.ravel(), 0, acq.ravel(), alpha=0.3, color='blue')
-        if next_x is None and n_calls >= 0:
-            next_x = res.x_iters[n_random + n_calls]
-        if next_x is not None:
-            next_acq = _gaussian_acquisition(res.space.transform([next_x]), gp,
-                                             y_opt=np.min(curr_func_vals),
-                                             acq_func=acq_func,
-                                             acq_func_kwargs=acq_func_kwargs)
-            if acq_func in ["EI", "PI", "EIps", "PIps"]:
-                next_acq = -next_acq
+
         if show_next_point and next_x is not None:
-            ax_ei.plot(next_x, next_acq, "bo", markersize=6, label="Next query point")
+            ax_ei.plot(next_x, next_acq, "bo", markersize=6,
+                       label="Next query point")
 
     if show_title:
         ax.set_title(r"x* = %.4f, f(x*) = %.4f" % (res.x[0], res.fun))
@@ -258,14 +255,14 @@ def plot_gaussian_process(res, **kwargs):
         if plot_both:
             lines, labels = ax.get_legend_handles_labels()
             lines2, labels2 = ax_ei.get_legend_handles_labels()
-            ax_ei.legend(lines + lines2, labels + labels2, loc="best", prop={'size': 6}, numpoints=1)
+            ax_ei.legend(lines + lines2, labels + labels2, loc="best",
+                         prop={'size': 6}, numpoints=1)
         else:
             ax.legend(loc="best", prop={'size': 6}, numpoints=1)
 
     return ax
 
 
-
 def plot_regret(*args, **kwargs):
     """Plot one or several cumulative regret traces.
 

From 16ecaa799ce3ba057a393663af5a84e6894b7f56 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 26 Feb 2020 21:41:10 +0100
Subject: [PATCH 163/265] Adapt ask-and-tell example

* Improve plot function
---
 examples/ask-and-tell.py                | 65 ++++++++-----------------
 examples/bayesian-optimization.py       |  2 +-
 examples/exploration-vs-exploitation.py |  3 +-
 skopt/plots.py                          | 57 +++++++++++-----------
 4 files changed, 49 insertions(+), 78 deletions(-)

diff --git a/examples/ask-and-tell.py b/examples/ask-and-tell.py
index 21433b7a0..ff7b643ba 100644
--- a/examples/ask-and-tell.py
+++ b/examples/ask-and-tell.py
@@ -28,8 +28,8 @@
 
 import numpy as np
 np.random.seed(1234)
-
 import matplotlib.pyplot as plt
+from skopt.plots import plot_gaussian_process
 
 #############################################################################
 # The Setup
@@ -51,6 +51,9 @@ def objective(x, noise_level=noise_level):
     return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2))\
            + np.random.randn() * noise_level
 
+def objective_wo_noise(x, noise_level=0):
+    return objective(x, noise_level=0)
+
 #########################################################################
 # Here a quick plot to visualize what the function looks like:
 
@@ -72,7 +75,9 @@ def objective(x, noise_level=noise_level):
 # naming of the ***_minimize()** functions. An important difference is that
 # you do not pass the objective function to the optimizer.
 
-opt = Optimizer([(-2.0, 2.0)], "ET", acq_optimizer="sampling")
+opt = Optimizer([(-2.0, 2.0)], "GP", acq_func="EI",
+                acq_optimizer="sampling",
+                initial_point_generator="lhs")
 
 # To obtain a suggestion for the point at which to evaluate the objective
 # you call the ask() method of opt:
@@ -97,51 +102,16 @@ def objective(x, noise_level=noise_level):
 for i in range(9):
     next_x = opt.ask()
     f_val = objective(next_x)
-    opt.tell(next_x, f_val)
+    res = opt.tell(next_x, f_val)
 
 #########################################################################
 # We can now plot the random suggestions and the first model that has been
 # fit:
-
-from skopt.acquisition import gaussian_ei
-
-
-def plot_optimizer(opt, x, fx):
-    model = opt.models[-1]
-    x_model = opt.space.transform(x.tolist())
-
-    # Plot true function.
-    plt.plot(x, fx, "r--", label="True (unknown)")
-    plt.fill(np.concatenate([x, x[::-1]]),
-             np.concatenate([fx - 1.9600 * noise_level,
-                             fx[::-1] + 1.9600 * noise_level]),
-             alpha=.2, fc="r", ec="None")
-
-    # Plot Model(x) + contours
-    y_pred, sigma = model.predict(x_model, return_std=True)
-    plt.plot(x, y_pred, "g--", label=r"$\mu(x)$")
-    plt.fill(np.concatenate([x, x[::-1]]),
-             np.concatenate([y_pred - 1.9600 * sigma,
-                             (y_pred + 1.9600 * sigma)[::-1]]),
-             alpha=.2, fc="g", ec="None")
-
-    # Plot sampled points
-    plt.plot(opt.Xi, opt.yi,
-             "r.", markersize=8, label="Observations")
-
-    acq = gaussian_ei(x_model, model, y_opt=np.min(opt.yi))
-    # shift down to make a better plot
-    acq = 4 * acq - 2
-    plt.plot(x, acq, "b", label="EI(x)")
-    plt.fill_between(x.ravel(), -2.0, acq.ravel(), alpha=0.3, color='blue')
-
-    # Adjust plot layout
-    plt.grid()
-    plt.legend(loc='best')
-
-
-plot_optimizer(opt, x, fx)
-
+_ = plot_gaussian_process(res, objective=objective_wo_noise,
+                          noise_level=noise_level,
+                          show_next_point=False,
+                          show_acq_func=True)
+plt.show()
 #########################################################################
 # Let us sample a few more points and plot the optimizer again:
 
@@ -149,10 +119,13 @@ def plot_optimizer(opt, x, fx):
 for i in range(10):
     next_x = opt.ask()
     f_val = objective(next_x)
-    opt.tell(next_x, f_val)
-
-plot_optimizer(opt, x, fx)
+    res = opt.tell(next_x, f_val)
 
+_ = plot_gaussian_process(res, objective=objective_wo_noise,
+                          noise_level=noise_level,
+                          show_next_point=True,
+                          show_acq_func=True)
+plt.show()
 #########################################################################
 # By using the :class:`Optimizer` class directly you get control over the
 # optimization loop.
diff --git a/examples/bayesian-optimization.py b/examples/bayesian-optimization.py
index af779e094..412c89e15 100644
--- a/examples/bayesian-optimization.py
+++ b/examples/bayesian-optimization.py
@@ -27,7 +27,7 @@
 is certainly a better optimization algorithm than Bayesian optimization.
 
 This example uses :class:`plots.plot_gaussian_process` which is available
-since version 0.7.3.
+since version 0.8.
 
 Bayesian optimization loop
 --------------------------
diff --git a/examples/exploration-vs-exploitation.py b/examples/exploration-vs-exploitation.py
index bdda52c07..fd2db6c94 100644
--- a/examples/exploration-vs-exploitation.py
+++ b/examples/exploration-vs-exploitation.py
@@ -28,7 +28,7 @@
 value is updated with the new acquisition parameters.
 
 This example uses :class:`plots.plot_gaussian_process` which is available
-since version 0.7.3.
+since version 0.8.
 """
 print(__doc__)
 
@@ -178,7 +178,6 @@ def objective_wo_noise(x):
 #############################################################################
 acq_func_kwargs = {"kappa": 100000}
 #############################################################################
-
 opt.acq_func_kwargs = acq_func_kwargs
 opt.update_next()
 #############################################################################
diff --git a/skopt/plots.py b/skopt/plots.py
index 29c924f17..aea6114b6 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -117,32 +117,35 @@ def plot_gaussian_process(res, **kwargs):
         The matplotlib axes on which to draw the plot, or `None` to create
         a new one.
 
-    n_calls : int, default=-1
+    n_calls : int, default: -1
         Can be used to evaluate the model at call `n_calls`.
 
-    objective : func, default=None
+    objective : func, default: None
         Defines the true objective function. Must have one input parameter.
 
-    noise_level : float, default=0
+    n_points : int, default: 1000
+        Number of data points used to create the plots
+
+    noise_level : float, default: 0
         Sets the estimated noise level
 
-    show_legend : boolean, default=True
+    show_legend : boolean, default: True
         When True, a legend is plotted.
 
-    show_title : boolean, default=True
+    show_title : boolean, default: True
         When True, a title containing the found minimum value
         is shown
 
-    show_acq_func : boolean, default=False
+    show_acq_func : boolean, default: False
         When True, the acquisition function is plotted
 
-    show_next_point : boolean, default=False
+    show_next_point : boolean, default: False
         When True, the next evaluated point is plotted
 
-    show_observations : boolean, default=True
+    show_observations : boolean, default: True
         When True, observations are plotted as dots.
 
-    show_mu : boolean, default=True
+    show_mu : boolean, default: True
         When True, the predicted model is shown.
 
     Returns
@@ -160,22 +163,18 @@ def plot_gaussian_process(res, **kwargs):
     show_next_point = kwargs.get("show_next_point", False)
     show_observations = kwargs.get("show_observations", True)
     show_mu = kwargs.get("show_mu", True)
-    acq_func = kwargs.get("acq_func", None)
-    n_random = kwargs.get("n_random", None)
-    acq_func_kwargs = kwargs.get("acq_func_kwargs", None)
+    n_points = kwargs.get("n_points", 1000)
 
     if ax is None:
         ax = plt.gca()
-    bounds = res.space.dimensions[0].bounds
-    x = np.linspace(bounds[0], bounds[1], 400).reshape(-1, 1)
-    x_gp = res.space.transform(x.tolist())
+    assert res.space.n_dims == 1, "Space dimension must be 1"
+    x, x_model = _evenly_sample(res.space.dimensions[0], n_points)
+    x = x.reshape(-1, 1)
+    x_model = x_model.reshape(-1, 1)
     if res.specs is not None and "args" in res.specs:
-        if n_random is None:
-            n_random = res.specs["args"].get('n_random_starts', n_random)
-        if acq_func is None:
-            acq_func = res.specs["args"].get("acq_func", "EI")
-        if acq_func_kwargs is None:
-            acq_func_kwargs = res.specs["args"].get("acq_func_kwargs", {})
+        n_random = res.specs["args"].get('n_random_starts', None)
+        acq_func = res.specs["args"].get("acq_func", "EI")
+        acq_func_kwargs = res.specs["args"].get("acq_func_kwargs", {})
 
     if acq_func_kwargs is None:
         acq_func_kwargs = {}
@@ -187,11 +186,11 @@ def plot_gaussian_process(res, **kwargs):
     if objective is not None:
         fx = np.array([objective(x_i) for x_i in x])
     if n_calls < 0:
-        gp = res.models[-1]
+        model = res.models[-1]
         curr_x_iters = res.x_iters
         curr_func_vals = res.func_vals
     else:
-        gp = res.models[n_calls]
+        model = res.models[n_calls]
 
         curr_x_iters = res.x_iters[:n_random + n_calls]
         curr_func_vals = res.func_vals[:n_random + n_calls]
@@ -209,7 +208,7 @@ def plot_gaussian_process(res, **kwargs):
 
     # Plot GP(x) + contours
     if show_mu:
-        y_pred, sigma = gp.predict(x_gp, return_std=True)
+        y_pred, sigma = model.predict(x_model, return_std=True)
         ax.plot(x, y_pred, "g--", label=r"$\mu_{GP}(x)$")
         ax.fill(np.concatenate([x, x[::-1]]),
                 np.concatenate([y_pred - 1.9600 * sigma,
@@ -229,17 +228,17 @@ def plot_gaussian_process(res, **kwargs):
         ax_ei = ax
         plot_both = False
     if show_acq_func:
-        acq = _gaussian_acquisition(x_gp, gp, y_opt=np.min(curr_func_vals),
+        acq = _gaussian_acquisition(x_model, model, y_opt=np.min(curr_func_vals),
                                     acq_func=acq_func,
                                     acq_func_kwargs=acq_func_kwargs)
         next_x = x[np.argmin(acq)]
         next_acq = acq[np.argmin(acq)]
-        if acq_func in ["EI", "PI", "EIps", "PIps"]:
-            acq = - acq
-            next_acq = -next_acq
+        acq = - acq
+        next_acq = -next_acq
         ax_ei.plot(x, acq, "b", label=str(acq_func) + "(x)")
         if not plot_both:
-            ax_ei.fill_between(x.ravel(), 0, acq.ravel(), alpha=0.3, color='blue')
+            ax_ei.fill_between(x.ravel(), 0, acq.ravel(),
+                               alpha=0.3, color='blue')
 
         if show_next_point and next_x is not None:
             ax_ei.plot(next_x, next_acq, "bo", markersize=6,

From ff57137fdfd5921b39383e46507afa2858e9e66c Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 26 Feb 2020 21:43:56 +0100
Subject: [PATCH 164/265] Fix pep8

---
 examples/exploration-vs-exploitation.py             | 2 ++
 examples/optimizer-with-different-base-estimator.py | 4 ++++
 skopt/plots.py                                      | 3 ++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/examples/exploration-vs-exploitation.py b/examples/exploration-vs-exploitation.py
index fd2db6c94..a55b30c0e 100644
--- a/examples/exploration-vs-exploitation.py
+++ b/examples/exploration-vs-exploitation.py
@@ -54,6 +54,8 @@
 def objective(x, noise_level=noise_level):
     return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) +\
            np.random.randn() * noise_level
+
+
 def objective_wo_noise(x):
     return objective(x, noise_level=0)
 
diff --git a/examples/optimizer-with-different-base-estimator.py b/examples/optimizer-with-different-base-estimator.py
index f8682243c..cf1d84a96 100644
--- a/examples/optimizer-with-different-base-estimator.py
+++ b/examples/optimizer-with-different-base-estimator.py
@@ -34,9 +34,12 @@
 # Our 1D toy problem, this is the function we are trying to
 # minimize
 
+
 def objective(x, noise_level=noise_level):
     return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2))\
            + np.random.randn() * noise_level
+
+
 def objective_wo_noise(x):
     return objective(x, noise_level=0)
 
@@ -47,6 +50,7 @@ def objective_wo_noise(x):
 
 #############################################################################
 
+
 def plot_optimizer(res, n_iter, max_iters=5):
     if n_iter == 0:
         show_legend = True
diff --git a/skopt/plots.py b/skopt/plots.py
index aea6114b6..7b615c7ad 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -228,7 +228,8 @@ def plot_gaussian_process(res, **kwargs):
         ax_ei = ax
         plot_both = False
     if show_acq_func:
-        acq = _gaussian_acquisition(x_model, model, y_opt=np.min(curr_func_vals),
+        acq = _gaussian_acquisition(x_model, model,
+                                    y_opt=np.min(curr_func_vals),
                                     acq_func=acq_func,
                                     acq_func_kwargs=acq_func_kwargs)
         next_x = x[np.argmin(acq)]

From 7b7e19c3a92318fae0fa9d8a133e0831af7eada1 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Wed, 26 Feb 2020 21:58:42 +0100
Subject: [PATCH 165/265] Add unit test and fix some typos

---
 examples/exploration-vs-exploitation.py             | 6 +++---
 examples/optimizer-with-different-base-estimator.py | 2 +-
 skopt/tests/test_plots.py                           | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/exploration-vs-exploitation.py b/examples/exploration-vs-exploitation.py
index a55b30c0e..bbabd0884 100644
--- a/examples/exploration-vs-exploitation.py
+++ b/examples/exploration-vs-exploitation.py
@@ -17,10 +17,10 @@
 "EI" or "PI". By default the acqusition function is set to "gp_hedge" which
 chooses the best of these three. Therefore I recommend not using gp_hedge
 when tweaking exploration/exploitation, but instead choosing "LCB",
-"EI" or "PI.
+"EI" or "PI".
 
 The way to pass kappa and xi to the optimizer is to use the named argument
-"acq_func_kwargs". This is a dict of extra arguments for the aqcuisittion
+"acq_func_kwargs". This is a dict of extra arguments for the aqcuisition
 function.
 
 If you want opt.ask() to give a new acquisition value immediately after
@@ -44,7 +44,7 @@
 # -----------
 # First we define our objective like in the ask-and-tell example notebook and
 # define a plotting function. We do however only use on initial random point.
-# All points after the first one is therefore choosen by the acquisition
+# All points after the first one is therefore chosen by the acquisition
 # function.
 
 noise_level = 0.1
diff --git a/examples/optimizer-with-different-base-estimator.py b/examples/optimizer-with-different-base-estimator.py
index cf1d84a96..064f13dba 100644
--- a/examples/optimizer-with-different-base-estimator.py
+++ b/examples/optimizer-with-different-base-estimator.py
@@ -13,7 +13,7 @@
 we can create a regressor object and set it as kernel.
 
 This example uses :class:`plots.plot_gaussian_process` which is available
-since version 0.7.3.
+since version 0.8.
 """
 print(__doc__)
 
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index 361e10935..2f7ec4857 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -111,7 +111,7 @@ def test_evaluate_min_params():
     x_min2, f_min2 = expected_minimum_random_sampling(res,
                                                       n_random_starts=1000,
                                                       random_state=1)
-
+    plots.plot_gaussian_process(res)
     assert _evaluate_min_params(res, params='result') == res.x
     assert _evaluate_min_params(res, params=[1.]) == [1.]
     assert _evaluate_min_params(res, params='expected_minimum',

From 4e772a3c6728623ecf97447bbda681b61ce6d0a7 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 10:48:49 +0100
Subject: [PATCH 166/265] Start to integrate changes

* partial_dependence_1D and partial_dependence_2D is not used in partial_dependence
* new plot functions are renamed to _new
---
 skopt/plots.py            | 207 +++++++++++++-------------------------
 skopt/tests/test_plots.py |  48 ++++++++-
 2 files changed, 119 insertions(+), 136 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index d252ab7fb..81d65623e 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -354,61 +354,19 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
     For Categorical variables, the `xi` (and `yi` for 2D) returned are
     the indices of the variable in `Dimension.categories`.
     """
-    # The idea is to step through one dimension, evaluating the model with
-    # that dimension fixed and averaging either over random values or over
-    # the given ones in x_val in all other dimensions.
-    # (Or step through 2 dimensions when i and j are given.)
-    # Categorical dimensions make this interesting, because they are one-
-    # hot-encoded, so there is a one-to-many mapping of input dimensions
-    # to transformed (model) dimensions.
-
     # If we haven't parsed an x_eval list we use random sampled values instead
     if x_eval is None and sample_points is None:
         sample_points = space.transform(space.rvs(n_samples=n_samples))
     elif sample_points is None:
         sample_points = space.transform([x_eval])
 
-    # dim_locs[i] is the (column index of the) start of dim i in
-    # sample_points.
-    # This is usefull when we are using one hot encoding, i.e using
-    # categorical values
-    dim_locs = np.cumsum([0] + [d.transformed_size for d in space.dimensions])
-
     if j is None:
-        # We sample evenly instead of randomly. This is necessary when using
-        # categorical values
-        xi, xi_transformed = _evenly_sample(space.dimensions[i], n_points)
-        yi = []
-        for x_ in xi_transformed:
-            rvs_ = np.array(sample_points)  # copy
-            # We replace the values in the dimension that we want to keep
-            # fixed
-            rvs_[:, dim_locs[i]:dim_locs[i + 1]] = x_
-            # In case of `x_eval=None` rvs conists of random samples.
-            # Calculating the mean of these samples is how partial dependence
-            # is implemented.
-            yi.append(np.mean(model.predict(rvs_)))
-
-        return xi, yi
-
+        return partial_dependence_1D(space, model, i, sample_points, n_points)
     else:
-        xi, xi_transformed = _evenly_sample(space.dimensions[j], n_points)
-        yi, yi_transformed = _evenly_sample(space.dimensions[i], n_points)
+        return partial_dependence_2D(space, model, i, j, sample_points, n_points)
 
-        zi = []
-        for x_ in xi_transformed:
-            row = []
-            for y_ in yi_transformed:
-                rvs_ = np.array(sample_points)  # copy
-                rvs_[:, dim_locs[j]:dim_locs[j + 1]] = x_
-                rvs_[:, dim_locs[i]:dim_locs[i + 1]] = y_
-                row.append(np.mean(model.predict(rvs_)))
-            zi.append(row)
 
-        return xi, yi, np.array(zi).T
-
-
-def plot_objective_old(result, levels=10, n_points=40, n_samples=250, size=2,
+def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                        zscale='linear', dimensions=None, sample_source='random',
                        minimum='result', n_minimum_search=None):
     """Pairwise dependence plot of the objective function.
@@ -563,7 +521,7 @@ def plot_objective_old(result, levels=10, n_points=40, n_samples=250, size=2,
                                      dim_labels=dimensions)
 
 
-def plot_evaluations_old(result, bins=20, dimensions=None):
+def plot_evaluations(result, bins=20, dimensions=None):
     """Visualize the order in which points where sampled.
 
     The scatter plot matrix shows at which points in the search
@@ -834,7 +792,8 @@ def _map_bins(bins, bounds, prior, categories=None):
     return bins_mapped
 
 
-def partial_dependence_1D(model, dimension, samples, n_points=40):
+def partial_dependence_1D(space, model, i, samples,
+                          n_points=40):
     """
     Calculate the partial dependence for a single dimension.
     
@@ -847,11 +806,14 @@ def partial_dependence_1D(model, dimension, samples, n_points=40):
 
     Parameters
     ----------
+    space : `Space`
+        The parameter space over which the minimization was performed.
+
     model
         Surrogate model for the objective function.
 
-    dimension : Dimension
-        The `Dimension`-object for which to calculate the partial dependence.
+    i : int
+        The dimension for which to calculate the partial dependence.
 
     samples : np.array, shape=(n_points, n_dims)
         Randomly sampled and transformed points to use when averaging
@@ -859,14 +821,8 @@ def partial_dependence_1D(model, dimension, samples, n_points=40):
         dependence.
 
     n_points : int, default=40
-        Number of points along each dimension where the partial dependence
-        is evaluated.
-
-    x_eval : list, default=None
-        `x_eval` is a list of parameter values or None. In case `x_eval`
-        is not None, the parsed dependence will be calculated using these
-        values.
-        Otherwise, random selected samples will be used.
+        Number of points at which to evaluate the partial dependence
+        along each dimension `i`.
 
     Returns
     -------
@@ -876,6 +832,19 @@ def partial_dependence_1D(model, dimension, samples, n_points=40):
     yi : np.array
         The average value of the modelled objective function at each point `xi`.
     """
+    # The idea is to step through one dimension, evaluating the model with
+    # that dimension fixed and averaging either over random values or over
+    # the given ones in x_val in all other dimensions.
+    # (Or step through 2 dimensions when i and j are given.)
+    # Categorical dimensions make this interesting, because they are one-
+    # hot-encoded, so there is a one-to-many mapping of input dimensions
+    # to transformed (model) dimensions.
+
+    # dim_locs[i] is the (column index of the) start of dim i in
+    # sample_points.
+    # This is usefull when we are using one hot encoding, i.e using
+    # categorical values
+    dim_locs = np.cumsum([0] + [d.transformed_size for d in space.dimensions])
 
     def _calc(x):
         """
@@ -884,40 +853,23 @@ def _calc(x):
         the index'th dimension of the search-space to the value x,
         and then averaging over all samples.
         """
-
-        # Copy the samples so we don't destroy the originals.
-        samples_copy = np.copy(samples)
-
-        # Set the index'th dimension to x for all samples.
-        samples_copy[:, index] = x
-
-        # Calculate the predicted objective value for all samples.
-        y_pred = model.predict(samples_copy)
-
-        # The average predicted value for the objective function.
-        y_pred_mean = np.mean(y_pred)
-
-        return y_pred_mean
-
-    # Get search-space index for the given dimension.
-    index = dimension.index
-
-    # Get the bounds of the dimension.
-    bounds = dimension.bounds
-
-    # Generate evenly spaced points between the bounds.
-    xi = np.linspace(bounds[0], bounds[1], n_points)
-
-    # Transform the points if necessary.
-    xi_transformed = dimension.transform(xi)
-
+        rvs_ = np.array(samples)  # copy
+        # We replace the values in the dimension that we want to keep
+        # fixed
+        rvs_[:, dim_locs[i]:dim_locs[i + 1]] = x
+        # In case of `x_eval=None` rvs conists of random samples.
+        # Calculating the mean of these samples is how partial dependence
+        # is implemented.
+        return np.mean(model.predict(rvs_))
+    xi, xi_transformed = _evenly_sample(space.dimensions[i], n_points)
     # Calculate the partial dependence for all the points.
     yi = [_calc(x) for x in xi_transformed]
 
     return xi, yi
 
 
-def partial_dependence_2D(model, dimension1, dimension2, samples, n_points=40):
+def partial_dependence_2D(space, model, i, j, samples,
+                          n_points=40):
     """
     Calculate the partial dependence for two dimensions in the search-space.
 
@@ -930,24 +882,26 @@ def partial_dependence_2D(model, dimension1, dimension2, samples, n_points=40):
 
     Parameters
     ----------
+    space : `Space`
+        The parameter space over which the minimization was performed.
+
     model
         Surrogate model for the objective function.
 
-    dimension1 : Dimension
-        The first `Dimension`-object for which to calculate the
-        partial dependence.
+    i : int
+        The first dimension for which to calculate the partial dependence.
 
-    dimension2 : Dimension
-        The second `Dimension`-object for which to calculate the
-        partial dependence.
+    j : int
+        The second dimension for which to calculate the partial dependence.
 
     samples : np.array, shape=(n_points, n_dims)
         Randomly sampled and transformed points to use when averaging
-        the model function at each of the `n_points`.
+        the model function at each of the `n_points` when using partial
+        dependence.
 
     n_points : int, default=40
-        Number of points along each dimension where the partial dependence
-        is evaluated.
+        Number of points at which to evaluate the partial dependence
+        along each dimension `i` and `j`.
 
     Returns
     -------
@@ -960,7 +914,19 @@ def partial_dependence_2D(model, dimension1, dimension2, samples, n_points=40):
     zi : np.array, shape=(n_points, n_points)
         The average value of the objective function at each point `(xi, yi)`.
     """
+    # The idea is to step through one dimension, evaluating the model with
+    # that dimension fixed and averaging either over random values or over
+    # the given ones in x_val in all other dimensions.
+    # (Or step through 2 dimensions when i and j are given.)
+    # Categorical dimensions make this interesting, because they are one-
+    # hot-encoded, so there is a one-to-many mapping of input dimensions
+    # to transformed (model) dimensions.
 
+    # dim_locs[i] is the (column index of the) start of dim i in
+    # sample_points.
+    # This is usefull when we are using one hot encoding, i.e using
+    # categorical values
+    dim_locs = np.cumsum([0] + [d.transformed_size for d in space.dimensions])
     def _calc(x, y):
         """
         Helper-function to calculate the average predicted
@@ -969,40 +935,13 @@ def _calc(x, y):
         and setting the index2'th dimension to the value y,
         and then averaging over all samples.
         """
+        rvs_ = np.array(samples)  # copy
+        rvs_[:, dim_locs[j]:dim_locs[j + 1]] = x
+        rvs_[:, dim_locs[i]:dim_locs[i + 1]] = y
+        return np.mean(model.predict(rvs_))
 
-        # Copy the samples so we don't destroy the originals.
-        samples_copy = np.copy(samples)
-
-        # Set the index1'th dimension to x for all samples.
-        samples_copy[:, index1] = x
-
-        # Set the index2'th dimension to y for all samples.
-        samples_copy[:, index2] = y
-
-        # Calculate the predicted objective value for all samples.
-        z_pred = model.predict(samples_copy)
-
-        # The average predicted value for the objective function.
-        z_pred_mean = np.mean(z_pred)
-
-        return z_pred_mean
-
-    # Get search-space indices for the dimensions.
-    index1 = dimension1.index
-    index2 = dimension2.index
-
-    # Get search-space bounds for the dimensions.
-    bounds1 = dimension1.bounds
-    bounds2 = dimension2.bounds
-
-    # Generate evenly spaced points between the dimension bounds.
-    xi = np.linspace(bounds1[0], bounds1[1], n_points)
-    yi = np.linspace(bounds2[0], bounds2[1], n_points)
-
-    # Transform the points if necessary.
-    xi_transformed = dimension1.transform(xi)
-    yi_transformed = dimension2.transform(yi)
-
+    xi, xi_transformed = _evenly_sample(space.dimensions[j], n_points)
+    yi, yi_transformed = _evenly_sample(space.dimensions[i], n_points)
     # Calculate the partial dependence for all combinations of these points.
     zi = [[_calc(x, y) for x in xi_transformed] for y in yi_transformed]
 
@@ -1064,16 +1003,14 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
 
     # Get the search-space instance from the optimization results.
     space = result.space
-
+    if x_eval is None:
+        samples = space.transform(space.rvs(n_samples=n_samples))
+    else:
+        samples = space.transform([x_eval])
     # Get the dimension-object, its index in the search-space, and its name.
     dimension1 = space[dimension_name1]
     dimension2 = space[dimension_name2]
 
-    # Ensure dimensions are not Categorical.
-    # TODO replace with check_list_types(dimensions, (Integer, Real)) in PR #597
-    if any(isinstance(dim, Categorical) for dim in [dimension1, dimension2]):
-        raise ValueError("Categorical dimension is not supported.")
-
     # Get the indices for the search-space dimensions.
     index1 = dimension1.index
     index2 = dimension2.index
@@ -1228,7 +1165,7 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0):
     return fig, ax
 
 
-def plot_objective(result, levels=10, n_points=40, n_samples=250,
+def plot_objective_new(result, levels=10, n_points=40, n_samples=250,
                    zscale='linear', dimension_names=None):
     """
     Plot a 2-d matrix with so-called Partial Dependence plots
@@ -1463,7 +1400,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250,
     return fig, ax
 
 
-def plot_evaluations(result, bins=20, dimension_names=None):
+def plot_evaluations_new(result, bins=20, dimension_names=None):
     """
     Visualize the order in which points were sampled during optimization.
 
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index 361e10935..f77674ce4 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -5,12 +5,14 @@
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.model_selection import cross_val_score
 from numpy.testing import assert_raises
+from numpy.testing import assert_array_almost_equal
 from skopt.space import Integer, Categorical
 from skopt import plots, gp_minimize
 import matplotlib.pyplot as plt
 from skopt.benchmarks import bench3
 from skopt import expected_minimum, expected_minimum_random_sampling
-from skopt.plots import _evaluate_min_params
+from skopt.plots import _evaluate_min_params, partial_dependence
+from skopt.plots import partial_dependence_1D, partial_dependence_2D
 from skopt import Optimizer
 
 
@@ -42,6 +44,50 @@ def objective(params):
 
     res = gp_minimize(objective, SPACE, n_calls=10, random_state=3)
 
+    samples = res.space.transform(res.space.rvs(n_samples=40,  random_state=3))
+    xi_ = [1., 10.5, 20.]
+    yi_ = [-0.9194979634067544, -0.9194802312964899, -0.9194601855985786]
+    xi, yi = partial_dependence(res.space, res.models[-1], 0,
+                                sample_points=samples, n_points=3)
+    assert_array_almost_equal(xi, xi_)
+    assert_array_almost_equal(yi, yi_)
+
+    xi, yi = partial_dependence_1D(res.space, res.models[-1], 0,
+                                   samples, n_points=3)
+    assert_array_almost_equal(xi, xi_)
+    assert_array_almost_equal(yi, yi_)
+
+    xi_ = [0, 1]
+    yi_ = [-0.919544265279874, -0.919428904254748]
+    xi, yi = partial_dependence(res.space, res.models[-1], 4,
+                                sample_points=samples, n_points=3)
+    assert_array_almost_equal(xi, xi_)
+    assert_array_almost_equal(yi, yi_)
+
+    xi, yi = partial_dependence_1D(res.space, res.models[-1], 4,
+                                   samples, n_points=3)
+    assert_array_almost_equal(xi, xi_)
+    assert_array_almost_equal(yi, yi_)
+
+    xi_ = [0, 1]
+    yi_ = [1., 10.5, 20.]
+    zi_ = [[-0.91956104, -0.91944569],
+           [-0.91954331, -0.91942795],
+           [-0.91952327, -0.91940791]]
+    xi, yi, zi = partial_dependence(res.space, res.models[-1], 0, 4,
+                                    sample_points=samples, n_points=3)
+    assert_array_almost_equal(xi, xi_)
+    assert_array_almost_equal(yi, yi_)
+    assert_array_almost_equal(zi, zi_)
+    xi, yi, zi = partial_dependence_2D(res.space, res.models[-1], 0, 4,
+                                       samples, n_points=3)
+    assert_array_almost_equal(xi, xi_)
+    assert_array_almost_equal(yi, yi_)
+    assert_array_almost_equal(zi, zi_)
+
+    x_min, f_min = expected_minimum_random_sampling(res, random_state=1)
+    x_min2, f_min2 = expected_minimum(res, random_state=1)
+
     x_min, f_min = expected_minimum_random_sampling(res, random_state=1)
     x_min2, f_min2 = expected_minimum(res, random_state=1)
 

From 65afce601389f3069ccd90dddd0e58f664e3c6fc Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 14:57:09 +0100
Subject: [PATCH 167/265] Several improvements in space and plots

* get_samples_dimension is removed
* __getitem__ for space improve, it returns now also the index
* plot_objective_2D fixed
* plot_historgram fixed
---
 skopt/plots.py            | 184 +++++++++++++++-----------------------
 skopt/space/space.py      |  16 ++--
 skopt/tests/test_space.py |  10 +++
 skopt/utils.py            |  37 --------
 4 files changed, 91 insertions(+), 156 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 81d65623e..c1bba247e 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -20,7 +20,6 @@
 from matplotlib.ticker import MaxNLocator, FuncFormatter  # noqa: E402
 
 from skopt.space import Categorical
-from skopt.utils import get_samples_dimension
 from collections import Counter
 
 
@@ -367,8 +366,8 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
 
 
 def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
-                       zscale='linear', dimensions=None, sample_source='random',
-                       minimum='result', n_minimum_search=None):
+                   zscale='linear', dimensions=None, sample_source='random',
+                   minimum='result', n_minimum_search=None):
     """Pairwise dependence plot of the objective function.
 
     The diagonal shows the partial dependence for dimension `i` with
@@ -473,11 +472,12 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     x_vals = _evaluate_min_params(result, minimum, n_minimum_search)
     if sample_source == "random":
         x_eval = None
+        samples = space.transform(space.rvs(n_samples=n_samples))
     else:
         x_eval = _evaluate_min_params(result, sample_source,
                                       n_minimum_search)
-    rvs_transformed = space.transform(space.rvs(n_samples=n_samples))
-    samples, minimum, _ = _map_categories(space, result.x_iters, x_vals)
+        samples = space.transform([x_eval])
+    x_samples, minimum, _ = _map_categories(space, result.x_iters, x_vals)
 
     if zscale == 'log':
         locator = LogLocator()
@@ -496,23 +496,21 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     for i in range(space.n_dims):
         for j in range(space.n_dims):
             if i == j:
-                xi, yi = partial_dependence(space, result.models[-1], i,
-                                            j=None,
-                                            sample_points=rvs_transformed,
-                                            n_points=n_points, x_eval=x_eval)
+                xi, yi = partial_dependence_1D(space, result.models[-1], i,
+                                               samples=samples,
+                                               n_points=n_points)
 
                 ax[i, i].plot(xi, yi)
                 ax[i, i].axvline(minimum[i], linestyle="--", color="r", lw=1)
 
             # lower triangle
             elif i > j:
-                xi, yi, zi = partial_dependence(space, result.models[-1],
-                                                i, j,
-                                                rvs_transformed, n_points,
-                                                x_eval=x_eval)
+                xi, yi, zi = partial_dependence_2D(space, result.models[-1],
+                                                   i, j,
+                                                   samples, n_points)
                 ax[i, j].contourf(xi, yi, zi, levels,
                                   locator=locator, cmap='viridis_r')
-                ax[i, j].scatter(samples[:, j], samples[:, i],
+                ax[i, j].scatter(x_samples[:, j], x_samples[:, i],
                                  c='k', s=10, lw=0.)
                 ax[i, j].scatter(minimum[j], minimum[i],
                                  c=['r'], s=20, lw=0.)
@@ -673,7 +671,7 @@ def _adjust_fig(fig, ax, space, ylabel, dimensions):
     # Process the plots on the diagonal.
     for row in range(n_dims):
         # Get the search-space dimension for this row.
-        dim = dimensions[row]
+        index, dim = dimensions[row]
 
         # Reference to the diagonal plot for this row.
         a = ax[row, row]
@@ -695,12 +693,12 @@ def _adjust_fig(fig, ax, space, ylabel, dimensions):
     # Process the plots below the diagonal.
     for row in range(n_dims):
         # Get the search-space dimension for this row.
-        dim_row = dimensions[row]
+        index_row, dim_row = dimensions[row]
 
         # Only iterate until the diagonal.
         for col in range(row):
             # Get the search-space dimension for this column.
-            dim_col = dimensions[col]
+            index_col, dim_col = dimensions[col]
 
             # Reference to the plot for this row and column.
             a = ax[row, col]
@@ -732,12 +730,12 @@ def _adjust_fig(fig, ax, space, ylabel, dimensions):
     # Set the dimension-names for the left-most column.
     col = 0
     for row in range(1, n_dims):
-        ax[row, col].set_ylabel(dimensions[row].name)
+        ax[row, col].set_ylabel(dimensions[row][1].name)
 
     # Set the dimension-names for the bottom row.
     row = n_dims - 1
     for col in range(0, n_dims):
-        ax[row, col].set_xlabel(dimensions[col].name)
+        ax[row, col].set_xlabel(dimensions[col][1].name)
 
     # Remove the y-tick labels for all plots except the left-most column.
     for row in range(n_dims):
@@ -952,7 +950,9 @@ def _calc(x, y):
 
 
 def plot_objective_2D(result, dimension_name1, dimension_name2,
-                      n_points=40, n_samples=250, levels=10, zscale='linear'):
+                      n_points=40, n_samples=250, levels=10, zscale='linear',
+                      sample_source='random',
+                      minimum='result', n_minimum_search=None, ax=None):
     """
     Create and return a Matplotlib figure and axes with a landscape
     contour-plot of the last fitted model of the search-space,
@@ -1003,43 +1003,39 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
 
     # Get the search-space instance from the optimization results.
     space = result.space
-    if x_eval is None:
+    x_vals = _evaluate_min_params(result, minimum, n_minimum_search)
+    if sample_source == "random":
+        x_eval = None
         samples = space.transform(space.rvs(n_samples=n_samples))
     else:
+        x_eval = _evaluate_min_params(result, sample_source,
+                                      n_minimum_search)
         samples = space.transform([x_eval])
+    x_samples, x_minimum, _ = _map_categories(space, result.x_iters, x_vals)
     # Get the dimension-object, its index in the search-space, and its name.
-    dimension1 = space[dimension_name1]
-    dimension2 = space[dimension_name2]
-
-    # Get the indices for the search-space dimensions.
-    index1 = dimension1.index
-    index2 = dimension2.index
+    index1, dimension1 = space[dimension_name1]
+    index2, dimension2 = space[dimension_name2]
 
     # Get the samples from the optimization-log for the relevant dimensions.
-    samples1 = get_samples_dimension(result=result, index=index1)
-    samples2 = get_samples_dimension(result=result, index=index2)
+    # samples1 = get_samples_dimension(result=result, index=index1)
+    samples1 = x_samples[:, index1]
+    samples2 = x_samples[:, index2]
+    # samples2 = get_samples_dimension(result=result, index=index2)
 
     # Get the best-found samples for the relevant dimensions.
-    best_sample1 = result.x[index1]
-    best_sample2 = result.x[index2]
+    best_sample1 = x_minimum[index1]
+    best_sample2 = x_minimum[index2]
 
     # Get the last fitted model for the search-space.
     last_model = result.models[-1]
 
-    # Get new random samples from the search-space and transform if necessary.
-    new_samples = space.rvs(n_samples=n_samples)
-    new_samples = space.transform(new_samples)
-
     # Estimate the objective function for these sampled points
     # using the last fitted model for the search-space.
-    xi, yi, zi = partial_dependence_2D(model=last_model,
-                                       dimension1=dimension1,
-                                       dimension2=dimension2,
-                                       samples=new_samples,
-                                       n_points=n_points)
+    xi, yi, zi = partial_dependence_2D(space, last_model, index1, index2,
+                                       samples, n_points=n_points)
 
-    # Start a new plot.
-    fig, ax = plt.subplots(nrows=1, ncols=1)
+    if ax is None:
+        ax = plt.gca()
 
     # Scale for the z-axis of the contour-plot. Either Log or Linear (None).
     locator = LogLocator() if zscale == 'log' else None
@@ -1049,16 +1045,16 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
 
     # Plot all the parameters that were sampled during optimization.
     # These are plotted as small black dots.
-    ax.scatter(samples1, samples2, c='black', s=10, linewidths=1)
+    ax.scatter(samples2, samples1, c='black', s=10, linewidths=1)
 
     # Plot the best parameters that were sampled during optimization.
     # These are plotted as a big red star.
-    ax.scatter(best_sample1, best_sample2,
+    ax.scatter(best_sample2, best_sample1,
                c='red', s=50, linewidths=1, marker='*')
 
     # Use the dimension-names as the labels for the plot-axes.
-    ax.set_xlabel(dimension_name1)
-    ax.set_ylabel(dimension_name2)
+    ax.set_xlabel(dimension2.name)
+    ax.set_ylabel(dimension1.name)
 
     # Use log-scale on the x-axis?
     if dimension1.prior == 'log-uniform':
@@ -1068,10 +1064,10 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
     if dimension2.prior == 'log-uniform':
         ax.set_yscale('log')
 
-    return fig, ax
+    return ax
 
 
-def plot_histogram(result, dimension_name, bins=20, rotate_labels=0):
+def plot_histogram(result, dimension_name, bins=20, rotate_labels=0, ax=None):
     """
     Create and return a Matplotlib figure with a histogram
     of the samples from the optimization results,
@@ -1094,11 +1090,6 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0):
 
     Returns
     -------
-    fig : `Matplotlib.Figure`
-        The Matplotlib Figure-object.
-        For example, you can save the plot by calling
-        `fig.savefig('file.png')`
-
     ax : `Matplotlib.Axes`
         The Matplotlib Axes-object.
     """
@@ -1107,13 +1098,13 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0):
     space = result.space
 
     # Get the dimension-object.
-    dimension = space[dimension_name]
+    index, dimension = space[dimension_name]
 
     # Get the samples from the optimization-log for that particular dimension.
-    samples = get_samples_dimension(result=result, index=dimension.index)
+    samples = [x[index] for x in result.x_iters]
 
-    # Start a new plot.
-    fig, ax = plt.subplots(nrows=1, ncols=1)
+    if ax is None:
+        ax = plt.gca()
 
     if isinstance(dimension, Categorical):
         # When the search-space dimension is Categorical, it means
@@ -1159,14 +1150,14 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0):
             ax.set_xscale('log')
 
     # Set the labels.
-    ax.set_xlabel(dimension_name)
+    ax.set_xlabel(dimension.name)
     ax.set_ylabel('Sample Count')
 
-    return fig, ax
+    return ax
 
 
 def plot_objective_new(result, levels=10, n_points=40, n_samples=250,
-                   zscale='linear', dimension_names=None):
+                       zscale='linear', dimension_names=None):
     """
     Plot a 2-d matrix with so-called Partial Dependence plots
     of the objective function. This shows the influence of each
@@ -1297,15 +1288,12 @@ def plot_objective_new(result, levels=10, n_points=40, n_samples=250,
     # Get the relevant search-space dimensions.
     if dimension_names is None:
         # Get all dimensions.
-        dimensions = space.dimensions
+        dimensions = []
+        for row in range(space.n_dims):
+            dimensions.append((row, space.dimensions[row]))
     else:
-        # Only get the named dimensions.
         dimensions = space[dimension_names]
 
-    # Ensure there are no categorical dimensions.
-    # TODO replace with check_list_types(dimensions, (Integer, Real)) in PR #597
-    if any(isinstance(dim, Categorical) for dim in dimensions):
-        raise ValueError("Categorical dimension is not supported.")
 
     # Number of search-space dimensions we are using.
     n_dims = len(dimensions)
@@ -1323,24 +1311,17 @@ def plot_objective_new(result, levels=10, n_points=40, n_samples=250,
     # For all rows in the 2-d plot matrix.
     for row in range(n_dims):
         # Get the search-space dimension for this row.
-        dim_row = dimensions[row]
-
-        # Get the index for the search-space dimension.
-        # This is used to lookup that particular dimension in some functions.
-        index_row = dim_row.index
+        index_row, dim_row = dimensions[row]
 
         # Get the samples from the optimization-log for this dimension.
-        samples_row = get_samples_dimension(result=result, index=index_row)
+        samples_row = [x[index_row] for x in result.x_iters]
 
         # Get the best-found sample for this dimension.
         best_sample_row = result.x[index_row]
 
-        # Search-space boundary for this dimension.
-        bounds_row = dim_row.bounds
-
         # Calculate partial dependence for this dimension.
-        xi, yi = partial_dependence_1D(model=last_model,
-                                       dimension=dim_row,
+        xi, yi = partial_dependence_1D(space, last_model,
+                                       index_row,
                                        samples=new_samples,
                                        n_points=n_points)
 
@@ -1358,23 +1339,18 @@ def plot_objective_new(result, levels=10, n_points=40, n_samples=250,
         # For all columns until the diagonal in the 2-d plot matrix.
         for col in range(row):
             # Get the search-space dimension for this column.
-            dim_col = dimensions[col]
-
-            # Get the index for this search-space dimension.
-            # This is used to lookup that dimension in some functions.
-            index_col = dim_col.index
+            index_col, dim_col = dimensions[col]
 
             # Get the samples from the optimization-log for that dimension.
-            samples_col = get_samples_dimension(result=result, index=index_col)
+            samples_col = [x[index_col] for x in result.x_iters]
 
             # Get the best-found sample for this dimension.
             best_sample_col = result.x[index_col]
 
             # Calculate the partial dependence for these two dimensions.
             # Note that column and row are switched here.
-            xi, yi, zi = partial_dependence_2D(model=last_model,
-                                               dimension1=dim_col,
-                                               dimension2=dim_row,
+            xi, yi, zi = partial_dependence_2D(space, last_model,
+                                               index_col, index_row,
                                                samples=new_samples,
                                                n_points=n_points)
 
@@ -1394,8 +1370,8 @@ def plot_objective_new(result, levels=10, n_points=40, n_samples=250,
                       c='red', s=100, lw=0., marker='*')
 
     # Make various adjustments to the plots.
-    _adjust_fig(fig=fig, ax=ax, space=space,
-                dimensions=dimensions, ylabel="Partial Dependence")
+    #_adjust_fig(fig=fig, ax=ax, space=space,
+    #            dimensions=dimensions, ylabel="Partial Dependence")
 
     return fig, ax
 
@@ -1447,22 +1423,11 @@ def plot_evaluations_new(result, bins=20, dimension_names=None):
     # Get the relevant search-space dimensions.
     if dimension_names is None:
         # Get all dimensions.
-        dimensions = space.dimensions
-        dim_index = []
-        for row in range(space.n_dims):
-            dim_index.append(row)
-    else:
-        # Only get the named dimensions.
         dimensions = []
-        dim_index = []
         for row in range(space.n_dims):
-            dim_name = space.dimensions[row].name
-            if dim_name is None:
-                dim_name = "X_%d" % row
-            if dim_name in dimension_names:
-                dimensions.append(space.dimensions[row])
-                dim_index.append(row)
-
+            dimensions.append((row, space.dimensions[row]))
+    else:
+        dimensions = space[dimension_names]
 
     # Number of search-space dimensions we are using.
     n_dims = len(dimensions)
@@ -1477,14 +1442,10 @@ def plot_evaluations_new(result, bins=20, dimension_names=None):
     # For all rows in the 2-d plot matrix.
     for row in range(n_dims):
         # Get the search-space dimension for this row.
-        dim_row = dimensions[row]
-
-        # Get the index for the search-space dimension.
-        # This is used to lookup that particular dimension in some functions.
-        index_row = dim_index[row]
+        index_row, dim_row = dimensions[row]
 
         # Get the samples from the optimization-log for this dimension.
-        samples_row = get_samples_dimension(result=result, index=index_row)
+        samples_row = [x[index_row] for x in result.x_iters]
 
         # Get the best-found sample for this dimension.
         best_sample_row = result.x[index_row]
@@ -1509,13 +1470,10 @@ def plot_evaluations_new(result, bins=20, dimension_names=None):
         # For all columns until the diagonal in the 2-d plot matrix.
         for col in range(row):
             # Get the search-space dimension for this column.
-            dim_col = dimensions[col]
+            index_col, dim_col = dimensions[col]
 
-            # Get the index for this search-space dimension.
-            # This is used to lookup that dimension in some functions.
-            index_col = dim_index[col]
             # Get the samples from the optimization-log for that dimension.
-            samples_col = get_samples_dimension(result=result, index=index_col)
+            samples_col = [x[index_col] for x in result.x_iters]
 
             # Plot all the parameters that were sampled during optimization.
             # These are plotted as small coloured dots, where the colour-shade
diff --git a/skopt/space/space.py b/skopt/space/space.py
index 32434dd27..bb4316fc2 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -919,24 +919,28 @@ def __getitem__(self, dimension_names):
 
         Returns
         -------
-        dims Dimension, list(Dimension), None
+        dims tuple (index, Dimension), list(tuple(index, Dimension)), \
+                (None, None)
             A single search-space dimension with the given name,
             or a list of search-space dimensions with the given names.
         """
 
         def _get(dimension_name):
             """Helper-function for getting a single dimension."""
-
+            index = 0
             # Get the index of the search-space dimension using its name.
             for dim in self.dimensions:
                 if dimension_name == dim.name:
-                    return dim
-            return None
+                    return (index, dim)
+                elif dimension_name == index:
+                    return (index, dim)
+                index += 1
+            return (None, None)
 
-        if isinstance(dimension_names, str):
+        if isinstance(dimension_names, (str, int)):
             # Get a single search-space dimension.
             dims = _get(dimension_name=dimension_names)
-        elif isinstance(dimension_names, list):
+        elif isinstance(dimension_names, (list, tuple)):
             # Get a list of search-space dimensions.
             # Note that we do not check whether the names are really strings.
             dims = [_get(dimension_name=name) for name in dimension_names]
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index 939321778..24979bba4 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -600,6 +600,16 @@ def test_dimension_name():
         with pytest.raises(ValueError) as exc:
             real = Real(1, 2, name=n)
             assert("Dimension's name must be either string or None." == exc.value.args[0])
+    s = Space([Real(1, 2, name="a"),
+               Integer(1, 100, name="b"),
+               Categorical(["red, blue"], name="c")])
+    assert s["a"] == (0, s.dimensions[0])
+    assert s["a", "c"] == [(0, s.dimensions[0]), (2, s.dimensions[2])]
+    assert s[["a", "c"]] == [(0, s.dimensions[0]), (2, s.dimensions[2])]
+    assert s[("a", "c")] == [(0, s.dimensions[0]), (2, s.dimensions[2])]
+    assert s[0] == (0, s.dimensions[0])
+    assert s[0, "c"] == [(0, s.dimensions[0]), (2, s.dimensions[2])]
+    assert s[0, 2] == [(0, s.dimensions[0]), (2, s.dimensions[2])]
 
             
 @pytest.mark.parametrize("dimension",
diff --git a/skopt/utils.py b/skopt/utils.py
index 54a49abd4..83d5e27ab 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -582,43 +582,6 @@ def normalize_dimensions(dimensions):
     return Space(transformed_dimensions)
 
 
-def get_samples_dimension(result, index):
-    """Get the samples for the given dimension index
-    from the optimization-result from e.g. `gp_minimize()`.
-
-    This function is used instead of numpy, because if
-    we convert `result.x_iters` to a 2-d numpy array,
-    then all data-types must be identical otherwise numpy
-    will promote all the types to the most general type.
-    For example, if you have a Categorical dimension which
-    is a string, then your Real and Integer dimensions will
-    be converted to strings as well in the 2-d numpy array.
-
-    Using this function instead of numpy ensures the
-    original data-type is being preserved.
-    
-    See `plots.py` for example usage.
-
-    Parameters
-    ----------
-    result : OptimizeResult
-        The optimization results e.g. from calling `gp_minimize()`.
-
-    index : int
-        Index for a dimension in the search-space.
-
-    Returns
-    -------
-    samples : list of either int, float or string
-        The optimization samples for the given dimension.
-    """
-
-    # Get the samples from the optimization-log for the relevant dimension.
-    samples = [x[index] for x in result.x_iters]
-
-    return samples
-
-  
 def check_list_types(x, types):
     """
     Check whether all elements of a list `x` are of the correct type(s)

From 9b0ce891f2fe94156d3ee6ec61f14aa27a1a2514 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 20:22:13 +0100
Subject: [PATCH 168/265] plot_objective and plot_evaluation have been merged

---
 doc/modules/classes.rst                       |   4 +
 ...artial-dependence-plot-with-categorical.py |   1 +
 examples/plots/partial-dependence-plot.py     |   1 -
 skopt/plots.py                                | 606 ++++++------------
 skopt/tests/test_plots.py                     |   2 +
 5 files changed, 187 insertions(+), 427 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index a411f2703..dd48514ae 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -188,9 +188,13 @@ details.
    :template: function.rst
 
     plots.partial_dependence
+    plots.partial_dependence_1D
+    plots.partial_dependence_2D
     plots.plot_convergence
     plots.plot_evaluations
     plots.plot_objective
+    plots.plot_objective_2D
+    plots.plot_histogram
     plots.plot_regret
 
 .. _utils_ref:
diff --git a/examples/plots/partial-dependence-plot-with-categorical.py b/examples/plots/partial-dependence-plot-with-categorical.py
index 733334823..72ebebce7 100644
--- a/examples/plots/partial-dependence-plot-with-categorical.py
+++ b/examples/plots/partial-dependence-plot-with-categorical.py
@@ -40,6 +40,7 @@ def objective(params):
 #############################################################################
 # Bayesian optimization
 # =====================
+
 SPACE = [
     Integer(1, 20, name='max_depth'),
     Integer(2, 100, name='min_samples_split'),
diff --git a/examples/plots/partial-dependence-plot.py b/examples/plots/partial-dependence-plot.py
index f85e8e152..58926a80c 100644
--- a/examples/plots/partial-dependence-plot.py
+++ b/examples/plots/partial-dependence-plot.py
@@ -19,7 +19,6 @@
 np.random.seed(123)
 import matplotlib.pyplot as plt
 
-
 #############################################################################
 # Objective function
 # ==================
diff --git a/skopt/plots.py b/skopt/plots.py
index c1bba247e..57b126497 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -197,49 +197,52 @@ def plot_regret(*args, **kwargs):
     return ax
 
 
-def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None):
+def _format_scatter_plot_axes(ax, space, ylabel, dimensions,
+                              dim_labels=None):
     # Work out min, max of y axis for the diagonal so we can adjust
     # them all to the same value
-    diagonal_ylim = (np.min([ax[i, i].get_ylim()[0]
-                             for i in range(space.n_dims)]),
-                     np.max([ax[i, i].get_ylim()[1]
-                             for i in range(space.n_dims)]))
+    diagonal_ylim = _get_ylim_diagonal(ax)
+    diagonal_ylim = tuple(diagonal_ylim)
+
+    # Number of search-space dimensions we are using.
+    n_dims = len(dimensions)
 
     if dim_labels is None:
         dim_labels = ["$X_{%i}$" % i if d.name is None else d.name
-                      for i, d in enumerate(space.dimensions)]
+                      for i, d in dimensions]
     # Axes for categorical dimensions are really integers; we have to
     # label them with the category names
-    iscat = [isinstance(dim, Categorical) for dim in space.dimensions]
+    iscat = [isinstance(dim[1], Categorical) for dim in dimensions]
 
     # Deal with formatting of the axes
-    for i in range(space.n_dims):  # rows
-        for j in range(space.n_dims):  # columns
+    for i in range(n_dims):  # rows
+        for j in range(n_dims):  # columns
             ax_ = ax[i, j]
-
+            index_i, dim_i = dimensions[i]
+            index_j, dim_j = dimensions[j]
             if j > i:
                 ax_.axis("off")
             elif i > j:  # off-diagonal plots
                 # plots on the diagonal are special, like Texas. They have
                 # their own range so do not mess with them.
                 if not iscat[i]:  # bounds not meaningful for categoricals
-                    ax_.set_ylim(*space.dimensions[i].bounds)
+                    ax_.set_ylim(*dim_i.bounds)
                 if iscat[j]:
                     # partial() avoids creating closures in a loop
                     ax_.xaxis.set_major_formatter(FuncFormatter(
-                        partial(_cat_format, space.dimensions[j])))
+                        partial(_cat_format, dim_j)))
                 else:
-                    ax_.set_xlim(*space.dimensions[j].bounds)
+                    ax_.set_xlim(*dim_j.bounds)
                 if j == 0:  # only leftmost column (0) gets y labels
                     ax_.set_ylabel(dim_labels[i])
                     if iscat[i]:  # Set category labels for left column
                         ax_.yaxis.set_major_formatter(FuncFormatter(
-                            partial(_cat_format, space.dimensions[i])))
+                            partial(_cat_format, dim_i)))
                 else:
                     ax_.set_yticklabels([])
 
                 # for all rows except ...
-                if i < space.n_dims - 1:
+                if i < n_dims - 1:
                     ax_.set_xticklabels([])
                 # ... the bottom row
                 else:
@@ -247,13 +250,13 @@ def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None):
                     ax_.set_xlabel(dim_labels[j])
 
                 # configure plot for linear vs log-scale
-                if space.dimensions[j].prior == 'log-uniform':
+                if dim_j.prior == 'log-uniform':
                     ax_.set_xscale('log')
                 else:
                     ax_.xaxis.set_major_locator(MaxNLocator(6, prune='both',
                                                             integer=iscat[j]))
 
-                if space.dimensions[i].prior == 'log-uniform':
+                if dim_i.prior == 'log-uniform':
                     ax_.set_yscale('log')
                 else:
                     ax_.yaxis.set_major_locator(MaxNLocator(6, prune='both',
@@ -270,14 +273,14 @@ def _format_scatter_plot_axes(ax, space, ylabel, dim_labels=None):
                 ax_.xaxis.set_label_position('top')
                 ax_.set_xlabel(dim_labels[j])
 
-                if space.dimensions[i].prior == 'log-uniform':
+                if dim_i.prior == 'log-uniform':
                     ax_.set_xscale('log')
                 else:
                     ax_.xaxis.set_major_locator(MaxNLocator(6, prune='both',
                                                             integer=iscat[i]))
                     if iscat[i]:
                         ax_.xaxis.set_major_formatter(FuncFormatter(
-                            partial(_cat_format, space.dimensions[i])))
+                            partial(_cat_format, dim_i)))
 
     return ax
 
@@ -360,31 +363,61 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
         sample_points = space.transform([x_eval])
 
     if j is None:
-        return partial_dependence_1D(space, model, i, sample_points, n_points)
+        return partial_dependence_1D(space, model, i,
+                                     sample_points, n_points)
     else:
-        return partial_dependence_2D(space, model, i, j, sample_points, n_points)
+        return partial_dependence_2D(space, model, i, j,
+                                     sample_points, n_points)
 
 
 def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                    zscale='linear', dimensions=None, sample_source='random',
-                   minimum='result', n_minimum_search=None):
-    """Pairwise dependence plot of the objective function.
-
-    The diagonal shows the partial dependence for dimension `i` with
-    respect to the objective function. The off-diagonal shows the
-    partial dependence for dimensions `i` and `j` with
-    respect to the objective function. The objective function is
-    approximated by `result.model.`
-
-    Pairwise scatter plots of the points at which the objective
-    function was directly evaluated are shown on the off-diagonal.
-    A red point indicates per default the best observed minimum, but
+                   minimum='result', n_minimum_search=None, dim_labels=None):
+    """Plot a 2-d matrix with so-called Partial Dependence plots
+    of the objective function. This shows the influence of each
+    search-space dimension on the objective function.
+
+    This uses the last fitted model for estimating the objective function.
+
+    The diagonal shows the effect of a single dimension on the
+    objective function, while the plots below the diagonal show
+    the effect on the objective function when varying two dimensions.
+
+    The Partial Dependence is calculated by averaging the objective value
+    for a number of random samples in the search-space,
+    while keeping one or two dimensions fixed at regular intervals. This
+    averages out the effect of varying the other dimensions and shows
+    the influence of one or two dimensions on the objective function.
+
+    Also shown are small black dots for the points that were sampled
+    during optimization.
+
+    A red star indicates per default the best observed minimum, but
     this can be changed by changing argument ´minimum´.
 
+    .. note::
+          The Partial Dependence plot is only an estimation of the surrogate
+          model which in turn is only an estimation of the true objective
+          function that has been optimized. This means the plots show
+          an "estimate of an estimate" and may therefore be quite imprecise,
+          especially if few samples have been collected during the
+          optimization
+          (e.g. less than 100-200 samples), and in regions of the search-space
+          that have been sparsely sampled (e.g. regions away from the optimum).
+          This means that the plots may change each time you run the
+          optimization and they should not be considered completely reliable.
+          These compromises are necessary because we cannot evaluate the
+          expensive objective function in order to plot it, so we have to use
+          the cheaper surrogate model to plot its contour. And in order to
+          show search-spaces with 3 dimensions or more in a 2-dimensional
+          plot,
+          we further need to map those dimensions to only 2-dimensions using
+          the Partial Dependence, which also causes distortions in the plots.
+
     Parameters
     ----------
     result : `OptimizeResult`
-        The result for which to create the scatter plot matrix.
+        The optimization results from calling e.g. `gp_minimize()`.
 
     levels : int, default=10
         Number of levels to draw on the contour plot, passed directly
@@ -405,7 +438,12 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         Scale to use for the z axis of the contour plots. Either 'linear'
         or 'log'.
 
-    dimensions : list of str, default=None
+    dimensions : list(str), list(int), default=None
+        List of names or indices for search-space dimensions to be
+        used in the plot.
+        If `None` then use all dimensions from the search-space.
+
+    dim_labels : list of str, default=None
         Labels of the dimension
         variables. `None` defaults to `space.dimensions[i].name`, or
         if also `None` to `['X_0', 'X_1', ..]`.
@@ -457,8 +495,8 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
 
     Returns
     -------
-    ax : `Axes`
-        The matplotlib axes.
+    ax : `Matplotlib.Axes`
+        A 2-d matrix of Axes-objects with the sub-plots.
     """
     # Here we define the values for which to plot the red dot (2d plot) and
     # the red dotted line (1d plot).
@@ -469,6 +507,18 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     if space.n_dims == 1:
         raise ValueError("plot_objective needs at least two"
                          "variables. Found only one.")
+    # Get the relevant search-space dimensions.
+    if dimensions is None:
+        # Get all dimensions.
+        dimensions = []
+        for row in range(space.n_dims):
+            dimensions.append((row, space.dimensions[row]))
+    else:
+        dimensions = space[dimensions]
+    # Number of search-space dimensions we are using.
+    n_dims = len(dimensions)
+    if dim_labels is not None:
+        assert len(dim_labels) == n_dims
     x_vals = _evaluate_min_params(result, minimum, n_minimum_search)
     if sample_source == "random":
         x_eval = None
@@ -487,16 +537,18 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         raise ValueError("Valid values for zscale are 'linear' and 'log',"
                          " not '%s'." % zscale)
 
-    fig, ax = plt.subplots(space.n_dims, space.n_dims,
-                           figsize=(size * space.n_dims, size * space.n_dims))
+    fig, ax = plt.subplots(n_dims, n_dims,
+                           figsize=(size * n_dims, size * n_dims))
 
     fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
                         hspace=0.1, wspace=0.1)
 
-    for i in range(space.n_dims):
-        for j in range(space.n_dims):
+    for i in range(n_dims):
+        for j in range(n_dims):
             if i == j:
-                xi, yi = partial_dependence_1D(space, result.models[-1], i,
+                index, dim = dimensions[i]
+                xi, yi = partial_dependence_1D(space, result.models[-1],
+                                               index,
                                                samples=samples,
                                                n_points=n_points)
 
@@ -505,48 +557,65 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
 
             # lower triangle
             elif i > j:
+                index1, dim1 = dimensions[i]
+                index2, dim2 = dimensions[j]
                 xi, yi, zi = partial_dependence_2D(space, result.models[-1],
-                                                   i, j,
+                                                   index1, index2,
                                                    samples, n_points)
                 ax[i, j].contourf(xi, yi, zi, levels,
                                   locator=locator, cmap='viridis_r')
-                ax[i, j].scatter(x_samples[:, j], x_samples[:, i],
+                ax[i, j].scatter(x_samples[:, index2], x_samples[:, index1],
                                  c='k', s=10, lw=0.)
-                ax[i, j].scatter(minimum[j], minimum[i],
-                                 c=['r'], s=20, lw=0.)
+                ax[i, j].scatter(minimum[index2], minimum[index1],
+                                 c=['r'], s=100, lw=0., marker='*')
     ylabel = "Partial dependence"
+
+    # Make various adjustments to the plots.
+    # _adjust_fig(fig=fig, ax=ax, space=space,
+    #             dimensions=dimensions, ylabel=ylabel)
+    # return ax
+
     return _format_scatter_plot_axes(ax, space, ylabel=ylabel,
-                                     dim_labels=dimensions)
+                                     dimensions=dimensions,
+                                     dim_labels=dim_labels)
+
 
+def plot_evaluations(result, bins=20, dimensions=None, dim_labels=None):
+    """Visualize the order in which points were sampled during optimization.
+
+    This creates a 2-d matrix plot where the diagonal plots are histograms
+    that show the distribution of samples for each search-space dimension.
 
-def plot_evaluations(result, bins=20, dimensions=None):
-    """Visualize the order in which points where sampled.
+    The plots below the diagonal are scatter-plots of the samples for
+    all combinations of search-space dimensions.
 
-    The scatter plot matrix shows at which points in the search
-    space and in which order samples were evaluated. Pairwise
-    scatter plots are shown on the off-diagonal for each
-    dimension of the search space. The order in which samples
+    The order in which samples
     were evaluated is encoded in each point's color.
-    The diagonal shows a histogram of sampled values for each
-    dimension. A red point indicates the found minimum.
+
+    A red star shows the best found parameters.
 
     Parameters
     ----------
     result : `OptimizeResult`
-        The result for which to create the scatter plot matrix.
+        The optimization results from calling e.g. `gp_minimize()`.
 
     bins : int, bins=20
         Number of bins to use for histograms on the diagonal.
 
-    dimensions : list of str, default=None
+    dimensions : list(str), list(int), default=None
+        List of names or indices for search-space dimensions to be
+        used in the plot.
+        If `None` then use all dimensions from the search-space.
+
+    dim_labels : list of str, default=None
         Labels of the dimension
         variables. `None` defaults to `space.dimensions[i].name`, or
         if also `None` to `['X_0', 'X_1', ..]`.
 
     Returns
     -------
-    ax : `Axes`
-        The matplotlib axes.
+    ax : `Matplotlib.Axes`
+        A 2-d matrix of Axes-objects with the sub-plots.
     """
     space = result.space
     # Convert categoricals to integers, so we can ensure consistent ordering.
@@ -555,35 +624,56 @@ def plot_evaluations(result, bins=20, dimensions=None):
     # and may order categoricals differently in different plots anyway.
     samples, minimum, iscat = _map_categories(space, result.x_iters, result.x)
     order = range(samples.shape[0])
-    fig, ax = plt.subplots(space.n_dims, space.n_dims,
-                           figsize=(2 * space.n_dims, 2 * space.n_dims))
+
+    if dimensions is None:
+        # Get all dimensions.
+        dimensions = []
+        for row in range(space.n_dims):
+            dimensions.append((row, space.dimensions[row]))
+    else:
+        dimensions = space[dimensions]
+    # Number of search-space dimensions we are using.
+    n_dims = len(dimensions)
+    if dim_labels is not None:
+        assert len(dim_labels) == n_dims
+
+    fig, ax = plt.subplots(n_dims, n_dims,
+                           figsize=(2 * n_dims, 2 * n_dims))
 
     fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
                         hspace=0.1, wspace=0.1)
 
-    for i in range(space.n_dims):
-        for j in range(space.n_dims):
+    for i in range(n_dims):
+        for j in range(n_dims):
             if i == j:
+                index, dim = dimensions[i]
                 if iscat[j]:
-                    bins_ = len(space.dimensions[j].categories)
-                elif space.dimensions[j].prior == 'log-uniform':
-                    low, high = space.bounds[j]
+                    bins_ = len(dim.categories)
+                elif dim.prior == 'log-uniform':
+                    low, high = space.bounds[index]
                     bins_ = np.logspace(np.log10(low), np.log10(high), bins)
                 else:
                     bins_ = bins
                 ax[i, i].hist(
-                    samples[:, j], bins=bins_,
-                    range=None if iscat[j] else space.dimensions[j].bounds)
+                    samples[:, index], bins=bins_,
+                    range=None if iscat[j] else dim.bounds)
 
             # lower triangle
             elif i > j:
-                ax[i, j].scatter(samples[:, j], samples[:, i],
+                index_i, dim_i = dimensions[i]
+                index_j, dim_j = dimensions[j]
+                ax[i, j].scatter(samples[:, index_j], samples[:, index_i],
                                  c=order, s=40, lw=0., cmap='viridis')
-                ax[i, j].scatter(minimum[j], minimum[i],
-                                 c=['r'], s=20, lw=0.)
+                ax[i, j].scatter(minimum[index_j], minimum[index_i],
+                                 c=['r'], s=100, lw=0., marker='*')
 
+    # Make various adjustments to the plots.
+    # _adjust_fig(fig=fig, ax=ax, space=space,
+    #             dimensions=dimensions, ylabel="Sample Count")
+    # return ax
     return _format_scatter_plot_axes(ax, space, ylabel="Number of samples",
-                                     dim_labels=dimensions)
+                                     dimensions=dimensions,
+                                     dim_labels=dim_labels)
 
 
 def _get_ylim_diagonal(ax):
@@ -762,7 +852,8 @@ def _map_bins(bins, bounds, prior, categories=None):
         Number of bins in the histogram.
 
     bounds : (int, int)
-        Tuple or list with lower- and upper-bounds for a search-space dimension.
+        Tuple or list with lower- and upper-bounds for a search-space
+        dimension.
 
     prior : str or None
         If 'log-uniform' then use log-scaling for the bins,
@@ -969,11 +1060,11 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
     result : `OptimizeResult`
         The optimization results e.g. from calling `gp_minimize()`.
 
-    dimension_name1 : str
-        Name of a dimension in the search-space.
+    dimension_name1 : str or int
+        Name or index of a dimension in the search-space.
 
-    dimension_name2 : str
-        Name of a dimension in the search-space.
+    dimension_name2 : str or int
+        Name or index of a dimension in the search-space.
 
     n_samples : int, default=250
         Number of random samples used for estimating the contour-plot
@@ -990,15 +1081,15 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
         Scale to use for the z axis of the contour plots.
         Either 'log' or linear for all other choices.
 
+    ax : `Matplotlib.Axes`, default: None
+        When set, everything is plotted inside this axis.
+
     Returns
     -------
-    fig : `Matplotlib.Figure`
-        The Matplotlib Figure-object.
-        For example, you can save the plot by calling `fig.savefig('file.png')` 
-
     ax : `Matplotlib.Axes`
         The Matplotlib Figure-object.
-        For example, you can save the plot by calling `fig.savefig('file.png')` 
+        For example, you can save the plot by calling
+        `fig.savefig('file.png')`
     """
 
     # Get the search-space instance from the optimization results.
@@ -1078,8 +1169,8 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0, ax=None):
     result : `OptimizeResult`
         The optimization results e.g. from calling `gp_minimize()`.
 
-    dimension_name : str
-        Name of a dimension in the search-space.
+    dimension_name : str or int
+        Name or index of a dimension in the search-space.
 
     bins : int, bins=20
         Number of bins in the histogram.
@@ -1156,346 +1247,6 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0, ax=None):
     return ax
 
 
-def plot_objective_new(result, levels=10, n_points=40, n_samples=250,
-                       zscale='linear', dimension_names=None):
-    """
-    Plot a 2-d matrix with so-called Partial Dependence plots
-    of the objective function. This shows the influence of each
-    search-space dimension on the objective function.
-
-    This uses the last fitted model for estimating the objective function.
-
-    The diagonal shows the effect of a single dimension on the
-    objective function, while the plots below the diagonal show
-    the effect on the objective function when varying two dimensions.
-
-    The Partial Dependence is calculated by averaging the objective value
-    for a number of random samples in the search-space,
-    while keeping one or two dimensions fixed at regular intervals. This
-    averages out the effect of varying the other dimensions and shows
-    the influence of one or two dimensions on the objective function.
-
-    Also shown are small black dots for the points that were sampled
-    during optimization, and large red stars show the best found points.
-
-    NOTE: The Partial Dependence plot is only an estimation of the surrogate
-          model which in turn is only an estimation of the true objective
-          function that has been optimized. This means the plots show
-          an "estimate of an estimate" and may therefore be quite imprecise,
-          especially if few samples have been collected during the optimization
-          (e.g. less than 100-200 samples), and in regions of the search-space
-          that have been sparsely sampled (e.g. regions away from the optimum).
-          This means that the plots may change each time you run the
-          optimization and they should not be considered completely reliable.
-          These compromises are necessary because we cannot evaluate the
-          expensive objective function in order to plot it, so we have to use
-          the cheaper surrogate model to plot its contour. And in order to
-          show search-spaces with 3 dimensions or more in a 2-dimensional plot,
-          we further need to map those dimensions to only 2-dimensions using
-          the Partial Dependence, which also causes distortions in the plots.
-
-    NOTE: Search-spaces with `Categorical` dimensions are not supported.
-
-    NOTE: This function can be very slow for dimensions greater than 5.
-
-    Parameters
-    ----------
-    result : `OptimizeResult`
-        The optimization results from calling e.g. `gp_minimize()`.
-
-    levels : int, default=10
-        Number of levels to draw on the contour plot, passed directly
-        to `plt.contour()`.
-
-    n_points : int, default=40
-        Number of points along each dimension where the partial dependence
-        is evaluated when generating the contour-plots.
-
-    n_samples : int, default=250
-        Number of points along each dimension where the partial dependence
-        is evaluated when generating the contour-plots.
-
-    zscale : str, default='linear'
-        Scale to use for the z-axis of the contour plots.
-        Either 'log' or linear for all other choices.
-
-    dimension_names : list(str), default=None
-        List of names for search-space dimensions to be used in the plot.
-        You can omit `Categorical` dimensions here as they are not supported.
-        If `None` then use all dimensions from the search-space.
-
-    sample_source : str or list of floats, default='random'
-        Defines to samples generation to use for averaging the model function
-        at each of the `n_points`.
-
-        A partial dependence plot is only generated, when `sample_source`
-        is set to 'random' and `n_samples` is sufficient.
-
-        `sample_source` can also be a list of
-        floats, which is then used for averaging.
-
-        Valid strings:
-
-            - 'random' - `n_samples` random samples will used
-
-            - 'result' - Use only the best observed parameters
-
-            - 'expected_minimum' - Parameters that gives the best
-                  minimum Calculated using scipy's minimize method.
-                  This method currently does not work with categorical values.
-
-            - 'expected_minimum_random' - Parameters that gives the
-                  best minimum when using naive random sampling.
-                  Works with categorical values.
-
-    minimum : str or list of floats, default = 'result'
-        Defines the values for the red points in the plots.
-        Valid strings:
-
-            - 'result' - Use best observed parameters
-
-            - 'expected_minimum' - Parameters that gives the best
-                  minimum Calculated using scipy's minimize method.
-                  This method currently does not work with categorical values.
-
-            - 'expected_minimum_random' - Parameters that gives the
-                  best minimum when using naive random sampling.
-                  Works with categorical values
-
-    n_minimum_search : int, default = None
-        Determines how many points should be evaluated
-        to find the minimum when using 'expected_minimum' or
-        'expected_minimum_random'. Parameter is used when
-        `sample_source` and/or `minimum` is set to
-        'expected_minimum' or 'expected_minimum_random'.
-
-    Returns
-    -------
-    fig : `Matplotlib.Figure`
-        The object for the figure.
-        For example, call `fig.savefig('plot.png')` to save the plot.
-
-    ax : `Matplotlib.Axes`
-        A 2-d matrix of Axes-objects with the sub-plots.
-    """
-
-    # Scale for the z-axis of the contour-plot. Either Log or Linear (None).
-    locator = LogLocator() if zscale == 'log' else None
-
-    # Get the search-space instance from the optimization results.
-    space = result.space
-
-    # Get the relevant search-space dimensions.
-    if dimension_names is None:
-        # Get all dimensions.
-        dimensions = []
-        for row in range(space.n_dims):
-            dimensions.append((row, space.dimensions[row]))
-    else:
-        dimensions = space[dimension_names]
-
-
-    # Number of search-space dimensions we are using.
-    n_dims = len(dimensions)
-
-    # Get the last fitted model for the search-space.
-    last_model = result.models[-1]
-
-    # Get new random samples from the search-space and transform if necessary.
-    new_samples = space.rvs(n_samples=n_samples)
-    new_samples = space.transform(new_samples)
-
-    # Create a figure for plotting a 2-d matrix of sub-plots.
-    fig, ax = plt.subplots(n_dims, n_dims, figsize=(2*n_dims, 2*n_dims))
-
-    # For all rows in the 2-d plot matrix.
-    for row in range(n_dims):
-        # Get the search-space dimension for this row.
-        index_row, dim_row = dimensions[row]
-
-        # Get the samples from the optimization-log for this dimension.
-        samples_row = [x[index_row] for x in result.x_iters]
-
-        # Get the best-found sample for this dimension.
-        best_sample_row = result.x[index_row]
-
-        # Calculate partial dependence for this dimension.
-        xi, yi = partial_dependence_1D(space, last_model,
-                                       index_row,
-                                       samples=new_samples,
-                                       n_points=n_points)
-
-        # Reference to the plot for the diagonal of this row.
-        a = ax[row, row]
-
-        # TODO: There is a problem here if yi is very large, then matplotlib
-        # TODO: writes a number above the plot that I don't know how to turn off.
-        # Plot the partial dependence for this dimension.
-        a.plot(xi, yi)
-
-        # Plot a dashed line for the best-found parameter.
-        a.axvline(best_sample_row, linestyle="--", color="red", lw=1)
-
-        # For all columns until the diagonal in the 2-d plot matrix.
-        for col in range(row):
-            # Get the search-space dimension for this column.
-            index_col, dim_col = dimensions[col]
-
-            # Get the samples from the optimization-log for that dimension.
-            samples_col = [x[index_col] for x in result.x_iters]
-
-            # Get the best-found sample for this dimension.
-            best_sample_col = result.x[index_col]
-
-            # Calculate the partial dependence for these two dimensions.
-            # Note that column and row are switched here.
-            xi, yi, zi = partial_dependence_2D(space, last_model,
-                                               index_col, index_row,
-                                               samples=new_samples,
-                                               n_points=n_points)
-
-            # Reference to the plot for this row and column.
-            a = ax[row, col]
-
-            # Plot the contour landscape for the objective function.
-            a.contourf(xi, yi, zi, levels, locator=locator, cmap='viridis_r')
-
-            # Plot all the parameters that were sampled during optimization.
-            # These are plotted as small black dots.
-            a.scatter(samples_col, samples_row, c='black', s=10, lw=0.)
-
-            # Plot the best parameters that were sampled during optimization.
-            # These are plotted as a big red star.
-            a.scatter(best_sample_col, best_sample_row,
-                      c='red', s=100, lw=0., marker='*')
-
-    # Make various adjustments to the plots.
-    #_adjust_fig(fig=fig, ax=ax, space=space,
-    #            dimensions=dimensions, ylabel="Partial Dependence")
-
-    return fig, ax
-
-
-def plot_evaluations_new(result, bins=20, dimension_names=None):
-    """
-    Visualize the order in which points were sampled during optimization.
-
-    This creates a 2-d matrix plot where the diagonal plots are histograms
-    that show the distribution of samples for each search-space dimension.
-
-    The plots below the diagonal are scatter-plots of the samples for
-    all combinations of search-space dimensions.
-
-    The ordering of the samples are shown as different colour-shades.
-
-    A red star shows the best found parameters.
-
-    Parameters
-    ----------
-    result : `OptimizeResult`
-        The optimization results from calling e.g. `gp_minimize()`.
-
-    bins : int, bins=20
-        Number of bins to use for histograms on the diagonal.
-
-    dimension_names : list of str, default=None
-        List of names for search-space dimensions to be used in the plot.
-        If `None` then use all dimensions from the search-space.
-
-    Returns
-    -------
-    fig : `Matplotlib.Figure`
-        The object for the figure.
-        For example, call `fig.savefig('plot.png')` to save the plot.
-
-    ax : `Matplotlib.Axes`
-        A 2-d matrix of Axes-objects with the sub-plots.
-    """
-
-    # Get the search-space instance from the optimization results.
-    space = result.space
-    # Convert categoricals to integers, so we can ensure consistent ordering.
-    # Assign indices to categories in the order they appear in the Dimension.
-    # Matplotlib's categorical plotting functions are only present in v 2.1+,
-    # and may order categoricals differently in different plots anyway.
-    samples, minimum, iscat = _map_categories(space, result.x_iters, result.x)
-    dimension_names = space.dimension_names
-    # Get the relevant search-space dimensions.
-    if dimension_names is None:
-        # Get all dimensions.
-        dimensions = []
-        for row in range(space.n_dims):
-            dimensions.append((row, space.dimensions[row]))
-    else:
-        dimensions = space[dimension_names]
-
-    # Number of search-space dimensions we are using.
-    n_dims = len(dimensions)
-
-    # Create a figure for plotting a 2-d matrix of sub-plots.
-    fig, ax = plt.subplots(n_dims, n_dims, figsize=(2 * n_dims, 2 * n_dims))
-
-    # Used to plot colour-shades for the sample-ordering.
-    # It is just a range from 0 to the number of samples.
-    sample_order = range(len(result.x_iters))
-
-    # For all rows in the 2-d plot matrix.
-    for row in range(n_dims):
-        # Get the search-space dimension for this row.
-        index_row, dim_row = dimensions[row]
-
-        # Get the samples from the optimization-log for this dimension.
-        samples_row = [x[index_row] for x in result.x_iters]
-
-        # Get the best-found sample for this dimension.
-        best_sample_row = result.x[index_row]
-
-        if iscat[row]:
-            categories = dim_row.categories
-            bounds_row = None
-        else:
-            categories = None
-            # Search-space boundary for this dimension.
-            bounds_row = dim_row.bounds
-
-        # Map the number of bins to a log-space if necessary.
-        bins_mapped = _map_bins(bins=bins,
-                                bounds=dim_row.bounds,
-                                prior=dim_row.prior,
-                                categories=categories)
-
-        # Plot a histogram on the diagonal.
-        ax[row, row].hist(samples_row, bins=bins_mapped, range=bounds_row)
-
-        # For all columns until the diagonal in the 2-d plot matrix.
-        for col in range(row):
-            # Get the search-space dimension for this column.
-            index_col, dim_col = dimensions[col]
-
-            # Get the samples from the optimization-log for that dimension.
-            samples_col = [x[index_col] for x in result.x_iters]
-
-            # Plot all the parameters that were sampled during optimization.
-            # These are plotted as small coloured dots, where the colour-shade
-            # indicates the time-progression.
-            ax[row, col].scatter(samples_col, samples_row,
-                                 c=sample_order, s=40, lw=0., cmap='viridis')
-
-            # Get the best-found sample for this dimension.
-            best_sample_col = result.x[index_col]
-
-            # Plot the best parameters that were sampled during optimization.
-            # These are plotted as a big red star.
-            ax[row, col].scatter(best_sample_col, best_sample_row,
-                                 c='red', s=100, lw=0., marker='*')
-
-    # Make various adjustments to the plots.
-    _adjust_fig(fig=fig, ax=ax, space=space,
-                dimensions=dimensions, ylabel="Sample Count")
-
-    return fig, ax
-
-
 def _map_categories(space, points, minimum):
     """
     Map categorical values to integers in a set of points.
@@ -1602,7 +1353,7 @@ def _evaluate_min_params(result, params='result',
         elif params == 'expected_minimum_random':
             # Do a minimum search by evaluating the function with
             # n_samples sample values
-            if n_minimum_search:
+            if n_minimum_search is not None:
                 # If a value for
                 # n_minimum_samples has been parsed
                 x_vals, _ = expected_minimum_random_sampling(
@@ -1612,9 +1363,12 @@ def _evaluate_min_params(result, params='result',
             else:
                 # Use standard of 10^n_parameters. Note this
                 # becomes very slow for many parameters
+                n_minimum_search = 10 ** len(result.x)
+                if n_minimum_search > 100000:
+                    n_minimum_search = 100000
                 x_vals, _ = expected_minimum_random_sampling(
                     result,
-                    n_random_starts=10 ** len(result.x),
+                    n_random_starts=n_minimum_search,
                     random_state=random_state)
         else:
             raise ValueError('Argument ´eval_min_params´ must be a valid'
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index f77674ce4..b302d7469 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -105,6 +105,8 @@ def objective(params):
     plots.plot_objective(res,
                          sample_source='result')
     plots.plot_regret(res)
+    plots.plot_objective_2D(res, 0, 4)
+    plots.plot_histogram(res, 0, 4)
 
     # TODO: Compare plots to known good results?
     # Look into how matplotlib does this.

From 4c8ffb2b77e41cfd2cee99f1e0718326d9cac513 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 20:46:18 +0100
Subject: [PATCH 169/265] Cleanup not used functions and pep8 fixes

---
 skopt/plots.py            | 199 ++------------------------------------
 skopt/tests/test_space.py |   5 +-
 2 files changed, 12 insertions(+), 192 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 57b126497..d7ed53da3 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -4,7 +4,8 @@
 from itertools import count
 from functools import partial
 from scipy.optimize import OptimizeResult
-
+from skopt.space import Categorical
+from collections import Counter
 from skopt import expected_minimum, expected_minimum_random_sampling
 from .space import Categorical
 
@@ -19,9 +20,6 @@
 from matplotlib.ticker import LogLocator
 from matplotlib.ticker import MaxNLocator, FuncFormatter  # noqa: E402
 
-from skopt.space import Categorical
-from collections import Counter
-
 
 def plot_convergence(*args, **kwargs):
     """Plot one or several convergence traces.
@@ -571,10 +569,6 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     ylabel = "Partial dependence"
 
     # Make various adjustments to the plots.
-    # _adjust_fig(fig=fig, ax=ax, space=space,
-    #             dimensions=dimensions, ylabel=ylabel)
-    # return ax
-
     return _format_scatter_plot_axes(ax, space, ylabel=ylabel,
                                      dimensions=dimensions,
                                      dim_labels=dim_labels)
@@ -668,9 +662,6 @@ def plot_evaluations(result, bins=20, dimensions=None, dim_labels=None):
                                  c=['r'], s=100, lw=0., marker='*')
 
     # Make various adjustments to the plots.
-    # _adjust_fig(fig=fig, ax=ax, space=space,
-    #             dimensions=dimensions, ylabel="Sample Count")
-    # return ax
     return _format_scatter_plot_axes(ax, space, ylabel="Number of samples",
                                      dimensions=dimensions,
                                      dim_labels=dim_labels)
@@ -711,181 +702,11 @@ def _get_ylim_diagonal(ax):
     return ylim_diagonal
 
 
-def _adjust_fig(fig, ax, space, ylabel, dimensions):
-    """
-    Process and adjust a 2-dimensional plot-matrix in various ways,
-    by writing axis-labels, etc.
-    
-    This is used by plot_objective() and plot_evaluations().
-    
-    Parameters
-    ----------
-    fig : `Matplotlib.Figure`
-        Figure-object for the plots.
-
-    ax : `Matplotlib.Axes`
-        2-dimensional matrix with Matplotlib Axes objects.
-
-    space : `Space`
-        Search-space object.
-
-    ylabel : `str`
-        String to be printed on the top-left diagonal plot
-        e.g. 'Sample Count'.
-
-    dimensions : `list(Dimension)`
-        List of `Dimension` objects used in the plots.
-
-    Returns
-    -------
-    * Nothing.
-    """
-
-    # Adjust spacing of the figure.
-    # This looks bad on some outputs so it has been disabled for now.
-    # fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
-    #                     hspace=0.1, wspace=0.1)
-
-    # Get min/max ylim for the diagonal plots, used to normalize their y-axis.
-    ylim_diagonal = _get_ylim_diagonal(ax=ax)
-
-    # The following for-loops process the sub-plots inside the 2-d matrix.
-    # This could perhaps also be implemented using other Python tricks,
-    # but these for-loops are probably much easier to understand.
-    # Similarly, they have been separated into several for-loops to make
-    # them easier to understand and modify.
-
-    # Number of search-space dimensions used in this plot.
-    n_dims = len(dimensions)
-
-    # Process the plots on the diagonal.
-    for row in range(n_dims):
-        # Get the search-space dimension for this row.
-        index, dim = dimensions[row]
-
-        # Reference to the diagonal plot for this row.
-        a = ax[row, row]
-
-        # Write the dimension-name as a label on top of the diagonal plot.
-        a.xaxis.set_label_position('top')
-        a.set_xlabel(dim.name)
-
-        # Set the x-axis limits to correspond to the search-space bounds.
-        a.set_xlim(dim.bounds)
-
-        # Use a common limit for the y-axis on all diagonal plots.
-        a.set_ylim(ylim_diagonal)
-
-        # Use log-scale on the x-axis?
-        if dim.prior == 'log-uniform':
-            a.set_xscale('log')
-
-    # Process the plots below the diagonal.
-    for row in range(n_dims):
-        # Get the search-space dimension for this row.
-        index_row, dim_row = dimensions[row]
-
-        # Only iterate until the diagonal.
-        for col in range(row):
-            # Get the search-space dimension for this column.
-            index_col, dim_col = dimensions[col]
-
-            # Reference to the plot for this row and column.
-            a = ax[row, col]
-
-            # Plot a grid.
-            a.grid(True)
-
-            # Set the plot-limits to correspond to the search-space bounds.
-            a.set_xlim(dim_col.bounds)
-            a.set_ylim(dim_row.bounds)
-
-            # Use log-scale on the x-axis?
-            if dim_col.prior == 'log-uniform':
-                a.set_xscale('log')
-
-            # Use log-scale on the y-axis?
-            if dim_row.prior == 'log-uniform':
-                a.set_yscale('log')
-
-    # Turn off all plots to the upper-right of the diagonal.
-    for row in range(n_dims):
-        for col in range(row+1, n_dims):
-            ax[row, col].axis("off")
-
-    # Set the designated ylabel for the top-left plot.
-    row = col = 0
-    ax[row, col].set_ylabel(ylabel)
-
-    # Set the dimension-names for the left-most column.
-    col = 0
-    for row in range(1, n_dims):
-        ax[row, col].set_ylabel(dimensions[row][1].name)
-
-    # Set the dimension-names for the bottom row.
-    row = n_dims - 1
-    for col in range(0, n_dims):
-        ax[row, col].set_xlabel(dimensions[col][1].name)
-
-    # Remove the y-tick labels for all plots except the left-most column.
-    for row in range(n_dims):
-        for col in range(1, n_dims):
-            ax[row, col].set_yticklabels([])
-
-    # Remove the x-tick labels for all plots except the bottom row.
-    for row in range(n_dims-1):
-        for col in range(n_dims):
-            ax[row, col].set_xticklabels([])
-
-
-def _map_bins(bins, bounds, prior, categories=None):
-    """
-    For use when plotting histograms.
-    Maps the number of bins to a log-scale between the bounds, if necessary.
-
-    When `x_eval` is not `None`, the given values are used instead of
-    random samples. In this case, `n_samples` will be ignored.
-
-    Parameters
-    ----------
-    bins : int
-        Number of bins in the histogram.
-
-    bounds : (int, int)
-        Tuple or list with lower- and upper-bounds for a search-space
-        dimension.
-
-    prior : str or None
-        If 'log-uniform' then use log-scaling for the bins,
-        otherwise use the original number of bins.
-
-    Returns
-    -------
-    bins_mapped : int or np.array(int)
-         Number of bins for a histogram if no mapping,
-         or a log-scaled array of bin-points if mapping is needed.
-    """
-    if categories is not None:
-        bins_ = len(categories)
-    elif prior == 'log-uniform':
-        # Map the number of bins to a log-space for the dimension bounds.
-        bounds_log = np.log10(bounds)
-        bins_mapped = np.logspace(bounds_log[0], bounds_log[1], bins)
-
-        # Note that Python 3.X supports the following, but not Python 2.7
-        # bins_mapped = np.logspace(*np.log10(bounds), bins)
-    else:
-        # Use the original number of bins.
-        bins_mapped = bins
-
-    return bins_mapped
-
-
 def partial_dependence_1D(space, model, i, samples,
                           n_points=40):
     """
     Calculate the partial dependence for a single dimension.
-    
+
     This uses the given model to calculate the average objective value
     for all the samples, where the given dimension is fixed at
     regular intervals between its bounds.
@@ -1053,8 +874,6 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
     This is similar to `plot_objective()` but only for 2 dimensions
     whose doc-string also has a more extensive explanation.
     
-    NOTE: Categorical dimensions are not supported.
-
     Parameters
     ----------
     result : `OptimizeResult`
@@ -1227,12 +1046,12 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0, ax=None):
     else:
         # Otherwise the search-space Dimension is either integer or float,
         # in which case the histogram can be plotted more easily.
-
-        # Map the number of bins to a log-space if necessary.
-        bins_mapped = _map_bins(bins=bins,
-                                bounds=dimension.bounds,
-                                prior=dimension.prior)
-
+        if dimension.prior == 'log-uniform':
+            # Map the number of bins to a log-space for the dimension bounds.
+            bins_mapped = np.logspace(*np.log10(dimension.bounds), bins)
+        else:
+            # Use the original number of bins.
+            bins_mapped = bins
         # Plot the histogram.
         ax.hist(samples, bins=bins_mapped, range=dimension.bounds)
 
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index 24979bba4..4ef1ad8c2 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -599,7 +599,8 @@ def test_dimension_name():
     for n in notnames:
         with pytest.raises(ValueError) as exc:
             real = Real(1, 2, name=n)
-            assert("Dimension's name must be either string or None." == exc.value.args[0])
+            assert("Dimension's name must be either string or"
+                   "None." == exc.value.args[0])
     s = Space([Real(1, 2, name="a"),
                Integer(1, 100, name="b"),
                Categorical(["red, blue"], name="c")])
@@ -611,7 +612,7 @@ def test_dimension_name():
     assert s[0, "c"] == [(0, s.dimensions[0]), (2, s.dimensions[2])]
     assert s[0, 2] == [(0, s.dimensions[0]), (2, s.dimensions[2])]
 
-            
+
 @pytest.mark.parametrize("dimension",
                          [Real(1, 2), Integer(1, 100), Categorical(["red, blue"])])
 def test_dimension_name_none(dimension):

From 157a8a5e4fc4623e0250bd138de43d35e625aa4b Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 20:50:23 +0100
Subject: [PATCH 170/265] pep8 fixes

---
 skopt/plots.py            |  6 ++++--
 skopt/tests/test_plots.py | 15 ---------------
 2 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index d7ed53da3..e0d953cc4 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -740,7 +740,8 @@ def partial_dependence_1D(space, model, i, samples,
         The points at which the partial dependence was evaluated.
 
     yi : np.array
-        The average value of the modelled objective function at each point `xi`.
+        The average value of the modelled objective function at
+        each point `xi`.
     """
     # The idea is to step through one dimension, evaluating the model with
     # that dimension fixed and averaging either over random values or over
@@ -837,6 +838,7 @@ def partial_dependence_2D(space, model, i, j, samples,
     # This is usefull when we are using one hot encoding, i.e using
     # categorical values
     dim_locs = np.cumsum([0] + [d.transformed_size for d in space.dimensions])
+
     def _calc(x, y):
         """
         Helper-function to calculate the average predicted
@@ -873,7 +875,7 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
 
     This is similar to `plot_objective()` but only for 2 dimensions
     whose doc-string also has a more extensive explanation.
-    
+
     Parameters
     ----------
     result : `OptimizeResult`
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index b302d7469..fcea42aa8 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -47,11 +47,6 @@ def objective(params):
     samples = res.space.transform(res.space.rvs(n_samples=40,  random_state=3))
     xi_ = [1., 10.5, 20.]
     yi_ = [-0.9194979634067544, -0.9194802312964899, -0.9194601855985786]
-    xi, yi = partial_dependence(res.space, res.models[-1], 0,
-                                sample_points=samples, n_points=3)
-    assert_array_almost_equal(xi, xi_)
-    assert_array_almost_equal(yi, yi_)
-
     xi, yi = partial_dependence_1D(res.space, res.models[-1], 0,
                                    samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
@@ -59,11 +54,6 @@ def objective(params):
 
     xi_ = [0, 1]
     yi_ = [-0.919544265279874, -0.919428904254748]
-    xi, yi = partial_dependence(res.space, res.models[-1], 4,
-                                sample_points=samples, n_points=3)
-    assert_array_almost_equal(xi, xi_)
-    assert_array_almost_equal(yi, yi_)
-
     xi, yi = partial_dependence_1D(res.space, res.models[-1], 4,
                                    samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
@@ -74,11 +64,6 @@ def objective(params):
     zi_ = [[-0.91956104, -0.91944569],
            [-0.91954331, -0.91942795],
            [-0.91952327, -0.91940791]]
-    xi, yi, zi = partial_dependence(res.space, res.models[-1], 0, 4,
-                                    sample_points=samples, n_points=3)
-    assert_array_almost_equal(xi, xi_)
-    assert_array_almost_equal(yi, yi_)
-    assert_array_almost_equal(zi, zi_)
     xi, yi, zi = partial_dependence_2D(res.space, res.models[-1], 0, 4,
                                        samples, n_points=3)
     assert_array_almost_equal(xi, xi_)

From cfca7302d858f1abcf823468dd9ee2bd8b5b9a15 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 20:52:41 +0100
Subject: [PATCH 171/265] Try to solve conflict

---
 skopt/plots.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index e0d953cc4..a35438ab5 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -4,10 +4,10 @@
 from itertools import count
 from functools import partial
 from scipy.optimize import OptimizeResult
-from skopt.space import Categorical
-from collections import Counter
+
 from skopt import expected_minimum, expected_minimum_random_sampling
 from .space import Categorical
+from collections import Counter
 
 # For plot tests, matplotlib must be set to headless mode early
 if 'pytest' in sys.modules:

From aa3ef1341802677387f4c43f5d2b9df725886b7f Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 21:15:15 +0100
Subject: [PATCH 172/265] Fix unit tests

---
 skopt/tests/test_plots.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index fcea42aa8..bb995a8a2 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -44,16 +44,19 @@ def objective(params):
 
     res = gp_minimize(objective, SPACE, n_calls=10, random_state=3)
 
-    samples = res.space.transform(res.space.rvs(n_samples=40,  random_state=3))
+    x = [[11, 52, 8, 14, 'entropy', 'f'],
+         [14, 90, 10, 2, 'gini', 'a'],
+         [7, 90, 6, 14, 'entropy', 'f']]
+    samples = res.space.transform(x)
     xi_ = [1., 10.5, 20.]
-    yi_ = [-0.9194979634067544, -0.9194802312964899, -0.9194601855985786]
+    yi_ = [-0.9240883492576596, -0.9240745890422687, -0.9240586402439884]
     xi, yi = partial_dependence_1D(res.space, res.models[-1], 0,
                                    samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
     assert_array_almost_equal(yi, yi_)
 
     xi_ = [0, 1]
-    yi_ = [-0.919544265279874, -0.919428904254748]
+    yi_ = [-0.9241087603770617, -0.9240188905968352]
     xi, yi = partial_dependence_1D(res.space, res.models[-1], 4,
                                    samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
@@ -61,9 +64,9 @@ def objective(params):
 
     xi_ = [0, 1]
     yi_ = [1., 10.5, 20.]
-    zi_ = [[-0.91956104, -0.91944569],
-           [-0.91954331, -0.91942795],
-           [-0.91952327, -0.91940791]]
+    zi_ = [[-0.92412562, -0.92403575],
+           [-0.92411186, -0.92402199],
+           [-0.92409591, -0.92400604]]
     xi, yi, zi = partial_dependence_2D(res.space, res.models[-1], 0, 4,
                                        samples, n_points=3)
     assert_array_almost_equal(xi, xi_)

From 7d7deaa485e5a315c933e02e39f8c66bfa79ad28 Mon Sep 17 00:00:00 2001
From: Kyle Gerard Felker <felker@anl.gov>
Date: Thu, 27 Feb 2020 15:01:51 -0600
Subject: [PATCH 173/265] Kappa is only used for LCB acquisition function

---
 skopt/acquisition.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/acquisition.py b/skopt/acquisition.py
index fec941dc9..ba3d409ee 100644
--- a/skopt/acquisition.py
+++ b/skopt/acquisition.py
@@ -112,7 +112,7 @@ def gaussian_lcb(X, model, kappa=1.96, return_grad=False):
         exploration over exploitation and vice versa.
         If set to 'inf', the acquisition function will only use the variance
         which is useful in a pure exploration setting.
-        Useless if ``method`` is set to "LCB".
+        Useless if ``method`` is not set to "LCB".
 
     return_grad : boolean, optional
         Whether or not to return the grad. Implemented only for the case where

From 51aa20395c5a968deef1135c62f92559853c8ceb Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 22:24:40 +0100
Subject: [PATCH 174/265] Try to fix unit test

---
 skopt/tests/test_plots.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index bb995a8a2..ff3876ff0 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -53,14 +53,14 @@ def objective(params):
     xi, yi = partial_dependence_1D(res.space, res.models[-1], 0,
                                    samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
-    assert_array_almost_equal(yi, yi_)
+    assert_array_almost_equal(yi, yi_, 1e-3)
 
     xi_ = [0, 1]
     yi_ = [-0.9241087603770617, -0.9240188905968352]
     xi, yi = partial_dependence_1D(res.space, res.models[-1], 4,
                                    samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
-    assert_array_almost_equal(yi, yi_)
+    assert_array_almost_equal(yi, yi_, 1e-3)
 
     xi_ = [0, 1]
     yi_ = [1., 10.5, 20.]
@@ -71,7 +71,7 @@ def objective(params):
                                        samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
     assert_array_almost_equal(yi, yi_)
-    assert_array_almost_equal(zi, zi_)
+    assert_array_almost_equal(zi, zi_, 1e-3)
 
     x_min, f_min = expected_minimum_random_sampling(res, random_state=1)
     x_min2, f_min2 = expected_minimum(res, random_state=1)

From 1d13eb7bdafec0e0d248040f81da262ff927db5f Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 23:00:46 +0100
Subject: [PATCH 175/265] Rename parameter back, so that plot_objective and
 plot_evaluation has the same API

---
 skopt/plots.py | 117 +++++++++++++++++++++++++------------------------
 1 file changed, 59 insertions(+), 58 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index a35438ab5..83b047632 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -195,29 +195,29 @@ def plot_regret(*args, **kwargs):
     return ax
 
 
-def _format_scatter_plot_axes(ax, space, ylabel, dimensions,
-                              dim_labels=None):
+def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
+                              dimensions=None):
     # Work out min, max of y axis for the diagonal so we can adjust
     # them all to the same value
     diagonal_ylim = _get_ylim_diagonal(ax)
     diagonal_ylim = tuple(diagonal_ylim)
 
     # Number of search-space dimensions we are using.
-    n_dims = len(dimensions)
+    n_dims = len(plot_dims)
 
-    if dim_labels is None:
-        dim_labels = ["$X_{%i}$" % i if d.name is None else d.name
-                      for i, d in dimensions]
+    if dimensions is None:
+        dimensions = ["$X_{%i}$" % i if d.name is None else d.name
+                      for i, d in plot_dims]
     # Axes for categorical dimensions are really integers; we have to
     # label them with the category names
-    iscat = [isinstance(dim[1], Categorical) for dim in dimensions]
+    iscat = [isinstance(dim[1], Categorical) for dim in plot_dims]
 
     # Deal with formatting of the axes
     for i in range(n_dims):  # rows
         for j in range(n_dims):  # columns
             ax_ = ax[i, j]
-            index_i, dim_i = dimensions[i]
-            index_j, dim_j = dimensions[j]
+            index_i, dim_i = plot_dims[i]
+            index_j, dim_j = plot_dims[j]
             if j > i:
                 ax_.axis("off")
             elif i > j:  # off-diagonal plots
@@ -232,7 +232,7 @@ def _format_scatter_plot_axes(ax, space, ylabel, dimensions,
                 else:
                     ax_.set_xlim(*dim_j.bounds)
                 if j == 0:  # only leftmost column (0) gets y labels
-                    ax_.set_ylabel(dim_labels[i])
+                    ax_.set_ylabel(dimensions[i])
                     if iscat[i]:  # Set category labels for left column
                         ax_.yaxis.set_major_formatter(FuncFormatter(
                             partial(_cat_format, dim_i)))
@@ -245,7 +245,7 @@ def _format_scatter_plot_axes(ax, space, ylabel, dimensions,
                 # ... the bottom row
                 else:
                     [l.set_rotation(45) for l in ax_.get_xticklabels()]
-                    ax_.set_xlabel(dim_labels[j])
+                    ax_.set_xlabel(dimensions[j])
 
                 # configure plot for linear vs log-scale
                 if dim_j.prior == 'log-uniform':
@@ -269,7 +269,7 @@ def _format_scatter_plot_axes(ax, space, ylabel, dimensions,
 
                 ax_.xaxis.tick_top()
                 ax_.xaxis.set_label_position('top')
-                ax_.set_xlabel(dim_labels[j])
+                ax_.set_xlabel(dimensions[j])
 
                 if dim_i.prior == 'log-uniform':
                     ax_.set_xscale('log')
@@ -370,7 +370,7 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
 
 def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                    zscale='linear', dimensions=None, sample_source='random',
-                   minimum='result', n_minimum_search=None, dim_labels=None):
+                   minimum='result', n_minimum_search=None, plot_dims=None):
     """Plot a 2-d matrix with so-called Partial Dependence plots
     of the objective function. This shows the influence of each
     search-space dimension on the objective function.
@@ -436,16 +436,16 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         Scale to use for the z axis of the contour plots. Either 'linear'
         or 'log'.
 
-    dimensions : list(str), list(int), default=None
-        List of names or indices for search-space dimensions to be
-        used in the plot.
-        If `None` then use all dimensions from the search-space.
-
-    dim_labels : list of str, default=None
+    dimensions : list of str, default=None
         Labels of the dimension
         variables. `None` defaults to `space.dimensions[i].name`, or
         if also `None` to `['X_0', 'X_1', ..]`.
 
+    plot_dims : list(str), list(int), default=None
+        List of names or indices for search-space dimensions to be
+        used in the plot.
+        If `None` then use all dimensions from the search-space.
+
     sample_source : str or list of floats, default='random'
         Defines to samples generation to use for averaging the model function
         at each of the `n_points`.
@@ -506,17 +506,17 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         raise ValueError("plot_objective needs at least two"
                          "variables. Found only one.")
     # Get the relevant search-space dimensions.
-    if dimensions is None:
+    if plot_dims is None:
         # Get all dimensions.
-        dimensions = []
+        plot_dims = []
         for row in range(space.n_dims):
-            dimensions.append((row, space.dimensions[row]))
+            plot_dims.append((row, space.dimensions[row]))
     else:
-        dimensions = space[dimensions]
+        plot_dims = space[plot_dims]
     # Number of search-space dimensions we are using.
-    n_dims = len(dimensions)
-    if dim_labels is not None:
-        assert len(dim_labels) == n_dims
+    n_dims = len(plot_dims)
+    if dimensions is not None:
+        assert len(dimensions) == n_dims
     x_vals = _evaluate_min_params(result, minimum, n_minimum_search)
     if sample_source == "random":
         x_eval = None
@@ -544,7 +544,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     for i in range(n_dims):
         for j in range(n_dims):
             if i == j:
-                index, dim = dimensions[i]
+                index, dim = plot_dims[i]
                 xi, yi = partial_dependence_1D(space, result.models[-1],
                                                index,
                                                samples=samples,
@@ -555,8 +555,8 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
 
             # lower triangle
             elif i > j:
-                index1, dim1 = dimensions[i]
-                index2, dim2 = dimensions[j]
+                index1, dim1 = plot_dims[i]
+                index2, dim2 = plot_dims[j]
                 xi, yi, zi = partial_dependence_2D(space, result.models[-1],
                                                    index1, index2,
                                                    samples, n_points)
@@ -570,11 +570,12 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
 
     # Make various adjustments to the plots.
     return _format_scatter_plot_axes(ax, space, ylabel=ylabel,
-                                     dimensions=dimensions,
-                                     dim_labels=dim_labels)
+                                     plot_dims=plot_dims,
+                                     dimensions=dimensions)
 
 
-def plot_evaluations(result, bins=20, dimensions=None, dim_labels=None):
+def plot_evaluations(result, bins=20, dimensions=None,
+                     plot_dims=None):
     """Visualize the order in which points were sampled during optimization.
 
     This creates a 2-d matrix plot where the diagonal plots are histograms
@@ -596,16 +597,16 @@ def plot_evaluations(result, bins=20, dimensions=None, dim_labels=None):
     bins : int, bins=20
         Number of bins to use for histograms on the diagonal.
 
-    dimensions : list(str), list(int), default=None
-        List of names or indices for search-space dimensions to be
-        used in the plot.
-        If `None` then use all dimensions from the search-space.
-
-    dim_labels : list of str, default=None
+    dimensions : list of str, default=None
         Labels of the dimension
         variables. `None` defaults to `space.dimensions[i].name`, or
         if also `None` to `['X_0', 'X_1', ..]`.
 
+    plot_dims : list(str), list(int), default=None
+        List of names or indices for search-space dimensions to be
+        used in the plot.
+        If `None` then use all dimensions from the search-space.
+
     Returns
     -------
     ax : `Matplotlib.Axes`
@@ -619,17 +620,17 @@ def plot_evaluations(result, bins=20, dimensions=None, dim_labels=None):
     samples, minimum, iscat = _map_categories(space, result.x_iters, result.x)
     order = range(samples.shape[0])
 
-    if dimensions is None:
+    if plot_dims is None:
         # Get all dimensions.
-        dimensions = []
+        plot_dims = []
         for row in range(space.n_dims):
-            dimensions.append((row, space.dimensions[row]))
+            plot_dims.append((row, space.dimensions[row]))
     else:
-        dimensions = space[dimensions]
+        plot_dims = space[plot_dims]
     # Number of search-space dimensions we are using.
-    n_dims = len(dimensions)
-    if dim_labels is not None:
-        assert len(dim_labels) == n_dims
+    n_dims = len(plot_dims)
+    if dimensions is not None:
+        assert len(dimensions) == n_dims
 
     fig, ax = plt.subplots(n_dims, n_dims,
                            figsize=(2 * n_dims, 2 * n_dims))
@@ -640,7 +641,7 @@ def plot_evaluations(result, bins=20, dimensions=None, dim_labels=None):
     for i in range(n_dims):
         for j in range(n_dims):
             if i == j:
-                index, dim = dimensions[i]
+                index, dim = plot_dims[i]
                 if iscat[j]:
                     bins_ = len(dim.categories)
                 elif dim.prior == 'log-uniform':
@@ -654,8 +655,8 @@ def plot_evaluations(result, bins=20, dimensions=None, dim_labels=None):
 
             # lower triangle
             elif i > j:
-                index_i, dim_i = dimensions[i]
-                index_j, dim_j = dimensions[j]
+                index_i, dim_i = plot_dims[i]
+                index_j, dim_j = plot_dims[j]
                 ax[i, j].scatter(samples[:, index_j], samples[:, index_i],
                                  c=order, s=40, lw=0., cmap='viridis')
                 ax[i, j].scatter(minimum[index_j], minimum[index_i],
@@ -663,8 +664,8 @@ def plot_evaluations(result, bins=20, dimensions=None, dim_labels=None):
 
     # Make various adjustments to the plots.
     return _format_scatter_plot_axes(ax, space, ylabel="Number of samples",
-                                     dimensions=dimensions,
-                                     dim_labels=dim_labels)
+                                     plot_dims=plot_dims,
+                                     dimensions=dimensions)
 
 
 def _get_ylim_diagonal(ax):
@@ -863,7 +864,7 @@ def _calc(x, y):
     return xi, yi, zi
 
 
-def plot_objective_2D(result, dimension_name1, dimension_name2,
+def plot_objective_2D(result, dimension_identifier1, dimension_identifier2,
                       n_points=40, n_samples=250, levels=10, zscale='linear',
                       sample_source='random',
                       minimum='result', n_minimum_search=None, ax=None):
@@ -881,10 +882,10 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
     result : `OptimizeResult`
         The optimization results e.g. from calling `gp_minimize()`.
 
-    dimension_name1 : str or int
+    dimension_identifier1 : str or int
         Name or index of a dimension in the search-space.
 
-    dimension_name2 : str or int
+    dimension_identifier2 : str or int
         Name or index of a dimension in the search-space.
 
     n_samples : int, default=250
@@ -925,8 +926,8 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
         samples = space.transform([x_eval])
     x_samples, x_minimum, _ = _map_categories(space, result.x_iters, x_vals)
     # Get the dimension-object, its index in the search-space, and its name.
-    index1, dimension1 = space[dimension_name1]
-    index2, dimension2 = space[dimension_name2]
+    index1, dimension1 = space[dimension_identifier1]
+    index2, dimension2 = space[dimension_identifier2]
 
     # Get the samples from the optimization-log for the relevant dimensions.
     # samples1 = get_samples_dimension(result=result, index=index1)
@@ -979,7 +980,7 @@ def plot_objective_2D(result, dimension_name1, dimension_name2,
     return ax
 
 
-def plot_histogram(result, dimension_name, bins=20, rotate_labels=0, ax=None):
+def plot_histogram(result, dimension_identifier, bins=20, rotate_labels=0, ax=None):
     """
     Create and return a Matplotlib figure with a histogram
     of the samples from the optimization results,
@@ -990,7 +991,7 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0, ax=None):
     result : `OptimizeResult`
         The optimization results e.g. from calling `gp_minimize()`.
 
-    dimension_name : str or int
+    dimension_identifier : str or int
         Name or index of a dimension in the search-space.
 
     bins : int, bins=20
@@ -1010,7 +1011,7 @@ def plot_histogram(result, dimension_name, bins=20, rotate_labels=0, ax=None):
     space = result.space
 
     # Get the dimension-object.
-    index, dimension = space[dimension_name]
+    index, dimension = space[dimension_identifier]
 
     # Get the samples from the optimization-log for that particular dimension.
     samples = [x[index] for x in result.x_iters]

From 5d8546c85bfd2bc45746b570757a76cbf95e0cb8 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 23:03:37 +0100
Subject: [PATCH 176/265] Fix pep8

---
 skopt/plots.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 83b047632..0c13ad5e5 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -980,7 +980,8 @@ def plot_objective_2D(result, dimension_identifier1, dimension_identifier2,
     return ax
 
 
-def plot_histogram(result, dimension_identifier, bins=20, rotate_labels=0, ax=None):
+def plot_histogram(result, dimension_identifier, bins=20, rotate_labels=0,
+                   ax=None):
     """
     Create and return a Matplotlib figure with a histogram
     of the samples from the optimization results,

From 51d1621a3bcef468ed38ab86369ec2c949c9461a Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 23:22:12 +0100
Subject: [PATCH 177/265] Allow to plot 1D spaces for plot_objective and
 plot_evaluations

* Fixes #740
---
 skopt/plots.py | 64 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 0c13ad5e5..ead9ad37f 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -203,7 +203,10 @@ def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
     diagonal_ylim = tuple(diagonal_ylim)
 
     # Number of search-space dimensions we are using.
-    n_dims = len(plot_dims)
+    if isinstance(ax, list):
+        n_dims = len(plot_dims)
+    else:
+        n_dims = 1
 
     if dimensions is None:
         dimensions = ["$X_{%i}$" % i if d.name is None else d.name
@@ -215,7 +218,10 @@ def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
     # Deal with formatting of the axes
     for i in range(n_dims):  # rows
         for j in range(n_dims):  # columns
-            ax_ = ax[i, j]
+            if n_dims > 1:
+                ax_ = ax[i, j]
+            else:
+                ax_ = ax
             index_i, dim_i = plot_dims[i]
             index_j, dim_j = plot_dims[j]
             if j > i:
@@ -502,9 +508,6 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     # calculating dependence. (Unless partial
     # dependence is to be used instead).
     space = result.space
-    if space.n_dims == 1:
-        raise ValueError("plot_objective needs at least two"
-                         "variables. Found only one.")
     # Get the relevant search-space dimensions.
     if plot_dims is None:
         # Get all dimensions.
@@ -549,23 +552,27 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                                                index,
                                                samples=samples,
                                                n_points=n_points)
-
-                ax[i, i].plot(xi, yi)
-                ax[i, i].axvline(minimum[i], linestyle="--", color="r", lw=1)
+                if n_dims > 1:
+                    ax_ = ax[i, i]
+                else:
+                    ax_ = ax
+                ax_.plot(xi, yi)
+                ax_.axvline(minimum[i], linestyle="--", color="r", lw=1)
 
             # lower triangle
             elif i > j:
                 index1, dim1 = plot_dims[i]
                 index2, dim2 = plot_dims[j]
+                ax_ = ax[i, j]
                 xi, yi, zi = partial_dependence_2D(space, result.models[-1],
                                                    index1, index2,
                                                    samples, n_points)
-                ax[i, j].contourf(xi, yi, zi, levels,
-                                  locator=locator, cmap='viridis_r')
-                ax[i, j].scatter(x_samples[:, index2], x_samples[:, index1],
-                                 c='k', s=10, lw=0.)
-                ax[i, j].scatter(minimum[index2], minimum[index1],
-                                 c=['r'], s=100, lw=0., marker='*')
+                ax_.contourf(xi, yi, zi, levels,
+                             locator=locator, cmap='viridis_r')
+                ax_.scatter(x_samples[:, index2], x_samples[:, index1],
+                            c='k', s=10, lw=0.)
+                ax_.scatter(minimum[index2], minimum[index1],
+                            c=['r'], s=100, lw=0., marker='*')
     ylabel = "Partial dependence"
 
     # Make various adjustments to the plots.
@@ -649,18 +656,22 @@ def plot_evaluations(result, bins=20, dimensions=None,
                     bins_ = np.logspace(np.log10(low), np.log10(high), bins)
                 else:
                     bins_ = bins
-                ax[i, i].hist(
-                    samples[:, index], bins=bins_,
-                    range=None if iscat[j] else dim.bounds)
+                if n_dims == 1:
+                    ax_ = ax
+                else:
+                    ax_ = ax[i, i]
+                ax_.hist(samples[:, index], bins=bins_,
+                         range=None if iscat[j] else dim.bounds)
 
             # lower triangle
             elif i > j:
                 index_i, dim_i = plot_dims[i]
                 index_j, dim_j = plot_dims[j]
-                ax[i, j].scatter(samples[:, index_j], samples[:, index_i],
-                                 c=order, s=40, lw=0., cmap='viridis')
-                ax[i, j].scatter(minimum[index_j], minimum[index_i],
-                                 c=['r'], s=100, lw=0., marker='*')
+                ax_ = ax[i, j]
+                ax_.scatter(samples[:, index_j], samples[:, index_i],
+                            c=order, s=40, lw=0., cmap='viridis')
+                ax_.scatter(minimum[index_j], minimum[index_i],
+                            c=['r'], s=100, lw=0., marker='*')
 
     # Make various adjustments to the plots.
     return _format_scatter_plot_axes(ax, space, ylabel="Number of samples",
@@ -685,10 +696,13 @@ def _get_ylim_diagonal(ax):
     """
 
     # Number of search-space dimensions used in this plot.
-    n_dims = len(ax)
-
-    # Get ylim for all diagonal plots.
-    ylim = [ax[row, row].get_ylim() for row in range(n_dims)]
+    if isinstance(ax, list):
+        n_dims = len(ax)
+        # Get ylim for all diagonal plots.
+        ylim = [ax[row, row].get_ylim() for row in range(n_dims)]
+    else:
+        n_dim = 1
+        ylim = [ax.get_ylim()]
 
     # Separate into two lists with low and high ylim.
     ylim_lo, ylim_hi = zip(*ylim)

From 15240f0d46214b6f9245a15bb32296345d99c7d0 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 23:35:13 +0100
Subject: [PATCH 178/265] Add unit test and fix isinstance check

Add check for #648
---
 skopt/plots.py            | 4 ++--
 skopt/tests/test_plots.py | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index ead9ad37f..8a94646ef 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -203,7 +203,7 @@ def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
     diagonal_ylim = tuple(diagonal_ylim)
 
     # Number of search-space dimensions we are using.
-    if isinstance(ax, list):
+    if isinstance(ax, (list, np.ndarray, np.array)):
         n_dims = len(plot_dims)
     else:
         n_dims = 1
@@ -696,7 +696,7 @@ def _get_ylim_diagonal(ax):
     """
 
     # Number of search-space dimensions used in this plot.
-    if isinstance(ax, list):
+    if isinstance(ax, (list, np.ndarray, np.array)):
         n_dims = len(ax)
         # Get ylim for all diagonal plots.
         ylim = [ax[row, row].get_ylim() for row in range(n_dims)]
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index ff3876ff0..18f57ec93 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -85,6 +85,7 @@ def objective(params):
     plots.plot_convergence(res)
     plots.plot_evaluations(res)
     plots.plot_objective(res)
+    plots.plot_objective(res, dimensions=["a", "b", "c", "d", "e", "f"])
     plots.plot_objective(res,
                          minimum='expected_minimum_random')
     plots.plot_objective(res,
@@ -176,4 +177,4 @@ def objective(x, noise_level=0.1):
         res = opt.tell(next_x, f_val)
 
     # Plot results
-    assert_raises(ValueError, plots.plot_objective, res)
+    plots.plot_objective(res)

From 059f427809956d6a425f10a511107f418f03c325 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Thu, 27 Feb 2020 23:38:13 +0100
Subject: [PATCH 179/265] Fix isinstance check

---
 skopt/plots.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 8a94646ef..4eabdf696 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -203,7 +203,7 @@ def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
     diagonal_ylim = tuple(diagonal_ylim)
 
     # Number of search-space dimensions we are using.
-    if isinstance(ax, (list, np.ndarray, np.array)):
+    if isinstance(ax, (list, np.ndarray)):
         n_dims = len(plot_dims)
     else:
         n_dims = 1
@@ -696,7 +696,7 @@ def _get_ylim_diagonal(ax):
     """
 
     # Number of search-space dimensions used in this plot.
-    if isinstance(ax, (list, np.ndarray, np.array)):
+    if isinstance(ax, (list, np.ndarray)):
         n_dims = len(ax)
         # Get ylim for all diagonal plots.
         ylim = [ax[row, row].get_ylim() for row in range(n_dims)]

From 58d6c62bab971bc55e44442ce6829d9238ae0cf7 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 09:58:42 +0100
Subject: [PATCH 180/265] Rename parameter back to old value

---
 skopt/plots.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 4eabdf696..c5476936f 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -196,7 +196,7 @@ def plot_regret(*args, **kwargs):
 
 
 def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
-                              dimensions=None):
+                              dim_labels=None):
     # Work out min, max of y axis for the diagonal so we can adjust
     # them all to the same value
     diagonal_ylim = _get_ylim_diagonal(ax)
@@ -208,8 +208,8 @@ def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
     else:
         n_dims = 1
 
-    if dimensions is None:
-        dimensions = ["$X_{%i}$" % i if d.name is None else d.name
+    if dim_labels is None:
+        dim_labels = ["$X_{%i}$" % i if d.name is None else d.name
                       for i, d in plot_dims]
     # Axes for categorical dimensions are really integers; we have to
     # label them with the category names
@@ -238,7 +238,7 @@ def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
                 else:
                     ax_.set_xlim(*dim_j.bounds)
                 if j == 0:  # only leftmost column (0) gets y labels
-                    ax_.set_ylabel(dimensions[i])
+                    ax_.set_ylabel(dim_labels[i])
                     if iscat[i]:  # Set category labels for left column
                         ax_.yaxis.set_major_formatter(FuncFormatter(
                             partial(_cat_format, dim_i)))
@@ -251,7 +251,7 @@ def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
                 # ... the bottom row
                 else:
                     [l.set_rotation(45) for l in ax_.get_xticklabels()]
-                    ax_.set_xlabel(dimensions[j])
+                    ax_.set_xlabel(dim_labels[j])
 
                 # configure plot for linear vs log-scale
                 if dim_j.prior == 'log-uniform':
@@ -275,7 +275,7 @@ def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
 
                 ax_.xaxis.tick_top()
                 ax_.xaxis.set_label_position('top')
-                ax_.set_xlabel(dimensions[j])
+                ax_.set_xlabel(dim_labels[j])
 
                 if dim_i.prior == 'log-uniform':
                     ax_.set_xscale('log')

From 9a6f3ffbbfa53b077b265452b9de0b0199cf65c3 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 10:02:49 +0100
Subject: [PATCH 181/265] Fix wrong parameter

---
 skopt/plots.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index c5476936f..9e44d6208 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -578,7 +578,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     # Make various adjustments to the plots.
     return _format_scatter_plot_axes(ax, space, ylabel=ylabel,
                                      plot_dims=plot_dims,
-                                     dimensions=dimensions)
+                                     dim_labels=dimensions)
 
 
 def plot_evaluations(result, bins=20, dimensions=None,
@@ -676,7 +676,7 @@ def plot_evaluations(result, bins=20, dimensions=None,
     # Make various adjustments to the plots.
     return _format_scatter_plot_axes(ax, space, ylabel="Number of samples",
                                      plot_dims=plot_dims,
-                                     dimensions=dimensions)
+                                     dim_labels=dimensions)
 
 
 def _get_ylim_diagonal(ax):

From 6d4c93dddc7c493da992f131bc545642d4c28946 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 10:09:42 +0100
Subject: [PATCH 182/265] Improve doc strings

---
 skopt/plots.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 9e44d6208..477e18a22 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -200,7 +200,6 @@ def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
     # Work out min, max of y axis for the diagonal so we can adjust
     # them all to the same value
     diagonal_ylim = _get_ylim_diagonal(ax)
-    diagonal_ylim = tuple(diagonal_ylim)
 
     # Number of search-space dimensions we are using.
     if isinstance(ax, (list, np.ndarray)):
@@ -447,9 +446,9 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         variables. `None` defaults to `space.dimensions[i].name`, or
         if also `None` to `['X_0', 'X_1', ..]`.
 
-    plot_dims : list(str), list(int), default=None
-        List of names or indices for search-space dimensions to be
-        used in the plot.
+    plot_dims : list of str and int, default=None
+        List of dimension names or dimension indices from the
+        search-space dimensions to be included in the plot.
         If `None` then use all dimensions from the search-space.
 
     sample_source : str or list of floats, default='random'
@@ -609,9 +608,9 @@ def plot_evaluations(result, bins=20, dimensions=None,
         variables. `None` defaults to `space.dimensions[i].name`, or
         if also `None` to `['X_0', 'X_1', ..]`.
 
-    plot_dims : list(str), list(int), default=None
-        List of names or indices for search-space dimensions to be
-        used in the plot.
+    plot_dims : list of str and int, default=None
+        List of dimension names or dimension indices from the
+        search-space dimensions to be included in the plot.
         If `None` then use all dimensions from the search-space.
 
     Returns
@@ -691,7 +690,7 @@ def _get_ylim_diagonal(ax):
 
     Returns
     -------
-    ylim_diagonal : list(int)
+    ylim_diagonal : tuple(int)
         The common min and max ylim for the diagonal plots.
     """
 
@@ -711,10 +710,7 @@ def _get_ylim_diagonal(ax):
     ylim_min = np.min(ylim_lo)
     ylim_max = np.max(ylim_hi)
 
-    # The common ylim for the diagonal plots.
-    ylim_diagonal = [ylim_min, ylim_max]
-
-    return ylim_diagonal
+    return ylim_min, ylim_max
 
 
 def partial_dependence_1D(space, model, i, samples,

From 3b43f3ea8831097b2d1e6d032d193bb8d5fb8031 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 11:11:02 +0100
Subject: [PATCH 183/265] Add simple example to show 2D plots and fix
 plot_objective_2D

---
 examples/plots/partial-dependence-plot-2D.py | 105 +++++++++++++++++++
 skopt/plots.py                               |  14 +--
 2 files changed, 112 insertions(+), 7 deletions(-)
 create mode 100644 examples/plots/partial-dependence-plot-2D.py

diff --git a/examples/plots/partial-dependence-plot-2D.py b/examples/plots/partial-dependence-plot-2D.py
new file mode 100644
index 000000000..bb03ba2c6
--- /dev/null
+++ b/examples/plots/partial-dependence-plot-2D.py
@@ -0,0 +1,105 @@
+"""
+===========================
+Partial Dependence Plots 2D
+===========================
+
+Hvass-Labs Dec 2017
+Holger Nahrstaedt 2020
+
+.. currentmodule:: skopt
+
+Simple example to show the new 2D plots.
+"""
+print(__doc__)
+import numpy as np
+from math import exp
+
+from skopt import gp_minimize
+from skopt.space import Real, Categorical, Integer
+from skopt.plots import plot_histogram, plot_objective_2D, plot_objective
+from skopt.utils import point_asdict
+np.random.seed(123)
+import matplotlib.pyplot as plt
+#############################################################################
+
+dim_learning_rate = Real(name='learning_rate', low=1e-6, high=1e-2, prior='log-uniform')
+dim_num_dense_layers = Integer(name='num_dense_layers', low=1, high=5)
+dim_num_dense_nodes = Integer(name='num_dense_nodes', low=5, high=512)
+dim_activation = Categorical(name='activation', categories=['relu', 'sigmoid'])
+
+dimensions = [dim_learning_rate,
+              dim_num_dense_layers,
+              dim_num_dense_nodes,
+              dim_activation]
+
+default_parameters = [1e-4, 1, 64, 'relu']
+
+def model_fitness(x):
+    learning_rate, num_dense_layers, num_dense_nodes, activation = x
+
+    fitness = ((exp(learning_rate) - 1.0) * 1000) ** 2 + \
+               (num_dense_layers) ** 2 + \
+               (num_dense_nodes/100) ** 2
+
+    fitness *= 1.0 + 0.1 * np.random.rand()
+
+    if activation == 'sigmoid':
+        fitness += 10
+
+    return fitness
+
+print(model_fitness(x=default_parameters))
+
+#############################################################################
+
+search_result = gp_minimize(func=model_fitness,
+                            dimensions=dimensions,
+                            n_calls=30,
+                            x0=default_parameters,
+                            random_state=123
+                            )
+
+print(search_result.x)
+print(search_result.fun)
+
+#############################################################################
+
+for fitness, x in sorted(zip(search_result.func_vals, search_result.x_iters)):
+    print(fitness, x)
+
+#############################################################################
+
+space = search_result.space
+
+print(search_result.x_iters)
+
+search_space = {name: space[name][1] for name in space.dimension_names}
+
+print(point_asdict(search_space, default_parameters))
+
+#############################################################################
+print("Plotting now ...")
+
+_ = plot_histogram(result=search_result, dimension_identifier='learning_rate',
+                   bins=20)
+plt.show()
+
+#############################################################################
+_ = plot_objective_2D(result=search_result,
+                      dimension_identifier1='learning_rate',
+                      dimension_identifier2='num_dense_nodes')
+plt.show()
+
+#############################################################################
+
+_ = plot_objective_2D(result=search_result,
+                      dimension_identifier1='num_dense_layers',
+                      dimension_identifier2='num_dense_nodes')
+plt.show()
+
+#############################################################################
+
+_ = plot_objective(result=search_result,
+                   plot_dims=['num_dense_layers',
+                              'num_dense_nodes'])
+plt.show()
diff --git a/skopt/plots.py b/skopt/plots.py
index 477e18a22..0358ea9c4 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -954,7 +954,7 @@ def plot_objective_2D(result, dimension_identifier1, dimension_identifier2,
 
     # Estimate the objective function for these sampled points
     # using the last fitted model for the search-space.
-    xi, yi, zi = partial_dependence_2D(space, last_model, index1, index2,
+    xi, yi, zi = partial_dependence_2D(space, last_model, index2, index1,
                                        samples, n_points=n_points)
 
     if ax is None:
@@ -968,17 +968,18 @@ def plot_objective_2D(result, dimension_identifier1, dimension_identifier2,
 
     # Plot all the parameters that were sampled during optimization.
     # These are plotted as small black dots.
-    ax.scatter(samples2, samples1, c='black', s=10, linewidths=1)
+    ax.scatter(samples1, samples2, c='black', s=10, linewidths=1)
 
     # Plot the best parameters that were sampled during optimization.
     # These are plotted as a big red star.
-    ax.scatter(best_sample2, best_sample1,
+    ax.scatter(best_sample1, best_sample2,
                c='red', s=50, linewidths=1, marker='*')
 
     # Use the dimension-names as the labels for the plot-axes.
-    ax.set_xlabel(dimension2.name)
-    ax.set_ylabel(dimension1.name)
-
+    ax.set_xlabel(dimension1.name)
+    ax.set_ylabel(dimension2.name)
+    ax.autoscale(enable=True, axis='x', tight=True)
+    ax.autoscale(enable=True, axis='y', tight=True)
     # Use log-scale on the x-axis?
     if dimension1.prior == 'log-uniform':
         ax.set_xscale('log')
@@ -986,7 +987,6 @@ def plot_objective_2D(result, dimension_identifier1, dimension_identifier2,
     # Use log-scale on the y-axis?
     if dimension2.prior == 'log-uniform':
         ax.set_yscale('log')
-
     return ax
 
 

From ef63716b3ff511b262bbf2d785e6cad582680be9 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 11:33:04 +0100
Subject: [PATCH 184/265] Resolve conflict

---
 skopt/plots.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index df6fb4b0c..87fc94708 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -6,7 +6,8 @@
 from scipy.optimize import OptimizeResult
 
 from skopt import expected_minimum, expected_minimum_random_sampling
-from .space import Categorical, Space
+from .space import Categorical
+from .space import Space
 
 # For plot tests, matplotlib must be set to headless mode early
 if 'pytest' in sys.modules:

From 6679bec3a8ff4758d6013f26d75096f4f1f9230c Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 12:13:25 +0100
Subject: [PATCH 185/265] Add whats_new entry

* remove n_minimum_search cap
---
 doc/whats_new/v0.8.rst | 8 ++++++++
 skopt/plots.py         | 2 --
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 914a9ad01..2f813e353 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -15,6 +15,14 @@ Version 0.8.0
   :pr:`884` by :user:`Holger Nahrstaedt <holgern>`
   based on :pr:`627` by :user:`JPN <jonathanng>`
 
+:mod:`skopt.plots`
+------------------
+- |Enhancement| Allow dimension selection for plot_objective
+  and plot_evaluations and add plot_histogram and plot_objective_2D.
+  Plot code has been refactored.
+  :pr:`848` by :user:`Holger Nahrstaedt <holgern>`
+  based on :pr:`579` by :user:`Hvass-Labs <Hvass-Labs>`
+
 :mod:`skopt.sampler`
 --------------------
 - |MajorFeature| Initial sampling generation
diff --git a/skopt/plots.py b/skopt/plots.py
index b191c39df..8ce8b916c 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -1357,8 +1357,6 @@ def _evaluate_min_params(result, params='result',
                 # Use standard of 10^n_parameters. Note this
                 # becomes very slow for many parameters
                 n_minimum_search = 10 ** len(result.x)
-                if n_minimum_search > 100000:
-                    n_minimum_search = 100000
                 x_vals, _ = expected_minimum_random_sampling(
                     result,
                     n_random_starts=n_minimum_search,

From 839056e12a4ac3e5e2a41e0f19b183ad72b2c383 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 12:27:32 +0100
Subject: [PATCH 186/265] Resolve conflict

---
 skopt/plots.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 87fc94708..0178afac4 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -4,10 +4,9 @@
 from itertools import count
 from functools import partial
 from scipy.optimize import OptimizeResult
-
+from .space import Space
 from skopt import expected_minimum, expected_minimum_random_sampling
 from .space import Categorical
-from .space import Space
 
 # For plot tests, matplotlib must be set to headless mode early
 if 'pytest' in sys.modules:

From 23894bee3c27e4d65752b531fe8b2e6581b635dd Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 12:32:57 +0100
Subject: [PATCH 187/265] Revert changes on plot

---
 examples/sklearn-gridsearchcv-replacement.py |  9 -----
 skopt/plots.py                               | 37 ++++++--------------
 2 files changed, 10 insertions(+), 36 deletions(-)

diff --git a/examples/sklearn-gridsearchcv-replacement.py b/examples/sklearn-gridsearchcv-replacement.py
index 5413390ff..2fd181bc9 100644
--- a/examples/sklearn-gridsearchcv-replacement.py
+++ b/examples/sklearn-gridsearchcv-replacement.py
@@ -35,8 +35,6 @@
 """
 print(__doc__)
 import numpy as np
-np.random.seed(123)
-import matplotlib.pyplot as plt
 
 #############################################################################
 # Minimal example
@@ -124,13 +122,6 @@
 
 print("val. score: %s" % opt.best_score_)
 print("test score: %s" % opt.score(X_test, y_test))
-print("best params: %s" % str(opt.best_params_))
-
-#############################################################################
-from skopt.plots import plot_objective
-
-plot_objective(opt.optimizer_results_[0])
-plt.show()
 
 #############################################################################
 # Progress monitoring and control using `callback` argument of `fit` method
diff --git a/skopt/plots.py b/skopt/plots.py
index 0178afac4..2879492d4 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -4,7 +4,7 @@
 from itertools import count
 from functools import partial
 from scipy.optimize import OptimizeResult
-from .space import Space
+
 from skopt import expected_minimum, expected_minimum_random_sampling
 from .space import Categorical
 
@@ -525,36 +525,22 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         raise ValueError("Valid values for zscale are 'linear' and 'log',"
                          " not '%s'." % zscale)
 
-    n_dims = space.n_dims - space.n_constant_dimensions
-    dim_without_constant = []
-    for dim in space.dimensions:
-        if dim.is_constant:
-            continue
-        dim_without_constant.append(dim)
-    space_without_constant = Space(dim_without_constant)
-    fig, ax = plt.subplots(n_dims, n_dims,
-                           figsize=(size * n_dims, size * n_dims))
+    fig, ax = plt.subplots(space.n_dims, space.n_dims,
+                           figsize=(size * space.n_dims, size * space.n_dims))
 
     fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95,
                         hspace=0.1, wspace=0.1)
 
-    ax_i = 0
-
     for i in range(space.n_dims):
-        if space.dimensions[i].is_constant:
-            continue
-        ax_j = 0
         for j in range(space.n_dims):
-            if space.dimensions[j].is_constant:
-                continue
             if i == j:
                 xi, yi = partial_dependence(space, result.models[-1], i,
                                             j=None,
                                             sample_points=rvs_transformed,
                                             n_points=n_points, x_eval=x_eval)
 
-                ax[ax_i, ax_i].plot(xi, yi)
-                ax[ax_i, ax_i].axvline(minimum[i], linestyle="--", color="r", lw=1)
+                ax[i, i].plot(xi, yi)
+                ax[i, i].axvline(minimum[i], linestyle="--", color="r", lw=1)
 
             # lower triangle
             elif i > j:
@@ -562,17 +548,14 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                                                 i, j,
                                                 rvs_transformed, n_points,
                                                 x_eval=x_eval)
-                if np.min(zi.shape) > 1:
-                    ax[ax_i, ax_j].contourf(xi, yi, zi, levels,
-                                      locator=locator, cmap='viridis_r')
-                ax[ax_i, ax_j].scatter(samples[:, j], samples[:, i],
+                ax[i, j].contourf(xi, yi, zi, levels,
+                                  locator=locator, cmap='viridis_r')
+                ax[i, j].scatter(samples[:, j], samples[:, i],
                                  c='k', s=10, lw=0.)
-                ax[ax_i, ax_j].scatter(minimum[j], minimum[i],
+                ax[i, j].scatter(minimum[j], minimum[i],
                                  c=['r'], s=20, lw=0.)
-            ax_j += 1
-        ax_i += 1
     ylabel = "Partial dependence"
-    return _format_scatter_plot_axes(ax, space_without_constant, ylabel=ylabel,
+    return _format_scatter_plot_axes(ax, space, ylabel=ylabel,
                                      dim_labels=dimensions)
 
 

From b398362198a828476f77c0282bfcb20f8e975cec Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 12:38:44 +0100
Subject: [PATCH 188/265] Add unit test

---
 skopt/tests/test_space.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index 0b09788d2..054ee2abf 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -366,6 +366,17 @@ def test_space_from_space():
     assert_equal(space, space2)
 
 
+@pytest.mark.fast_test
+def test_constant_property():
+    space = Space([(0.0, 1.0), (1,),
+                   ("a", "b", "c"), (1.0, 5.0, "log-uniform"), ("e",)])
+    assert space.n_constant_dimensions == 2
+    for i in [1, 4]:
+        assert space.dimensions[i].is_constant
+    for i in [0, 2, 3]:
+        assert not space.dimensions[i].is_constant
+
+
 @pytest.mark.fast_test
 def test_set_get_transformer():
     # can you pass a Space instance to the Space constructor?

From acf3d9a19a84c0cc9e0d5f8744f41ee19a491130 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 12:40:34 +0100
Subject: [PATCH 189/265] Add whats_new entry

---
 doc/whats_new/v0.8.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 7d791b466..89f529c57 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -17,6 +17,12 @@ Version 0.8.0
 - |Enhancement| Improve sampler and add grid sampler
   :pr:`851` by :user:`Holger Nahrstaedt <holgern>`
 
+:mod:`skopt.space`
+------------------
+- |Enhancement| Add `is_constant`  property to dimension and
+  `n_constant_dimensions` property to Space
+  :pr:`883` by :user:`Holger Nahrstaedt <holgern>`
+
 :mod:`skopt.utils`
 ------------------
 - |Fix| Fix Optimizer for full categorical spaces

From 7e6057d00644ec3006d942b155acd149b96d6e11 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 12:44:38 +0100
Subject: [PATCH 190/265] Improve pep8

---
 examples/plots/partial-dependence-plot-with-categorical.py | 3 ---
 examples/plots/partial-dependence-plot.py                  | 2 --
 2 files changed, 5 deletions(-)

diff --git a/examples/plots/partial-dependence-plot-with-categorical.py b/examples/plots/partial-dependence-plot-with-categorical.py
index 733334823..bd7bcf100 100644
--- a/examples/plots/partial-dependence-plot-with-categorical.py
+++ b/examples/plots/partial-dependence-plot-with-categorical.py
@@ -94,6 +94,3 @@ def objective(params):
 
 _ = plot_objective(result, n_points=10, sample_source=[15, 4, 7, 15, 'b', 'entropy', 'e'],
                    minimum=[15, 4, 7, 15, 'b', 'entropy', 'e'])
-
-
-
diff --git a/examples/plots/partial-dependence-plot.py b/examples/plots/partial-dependence-plot.py
index f85e8e152..03f6409c8 100644
--- a/examples/plots/partial-dependence-plot.py
+++ b/examples/plots/partial-dependence-plot.py
@@ -19,7 +19,6 @@
 np.random.seed(123)
 import matplotlib.pyplot as plt
 
-
 #############################################################################
 # Objective function
 # ==================
@@ -33,7 +32,6 @@ def funny_func(x):
         s += (x[i] * i) ** 2
     return s
 
-
 #############################################################################
 # Optimisation using decision trees
 # =================================

From d7a13a49ba4855b7cd57f57702f5631b49de7677 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 12:52:49 +0100
Subject: [PATCH 191/265] Fix doc warnings

---
 skopt/space/space.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index fd1156e8f..7c4177719 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -206,6 +206,7 @@ class Real(Dimension):
 
     prior : "uniform" or "log-uniform", default="uniform"
         Distribution to use when sampling random points for this dimension.
+
         - If `"uniform"`, points are sampled uniformly between the lower
           and upper bounds.
         - If `"log-uniform"`, points are sampled uniformly between
@@ -218,7 +219,6 @@ class Real(Dimension):
 
     transform : "identity", "normalize", optional
         The following transformations are supported.
-
         - "identity", (default) the transformed space is the same as the
           original space.
         - "normalize", the transformed space is scaled to be between
@@ -230,6 +230,7 @@ class Real(Dimension):
     dtype : str or dtype, default=np.float
         float type which will be used in inverse_transform,
         can be float.
+
     """
     def __init__(self, low, high, prior="uniform", base=10, transform=None,
                  name=None, dtype=np.float):
@@ -383,6 +384,7 @@ class Integer(Dimension):
     prior : "uniform" or "log-uniform", default="uniform"
         Distribution to use when sampling random integers for
         this dimension.
+
         - If `"uniform"`, intgers are sampled uniformly between the lower
           and upper bounds.
         - If `"log-uniform"`, intgers are sampled uniformly between
@@ -391,6 +393,7 @@ class Integer(Dimension):
 
     base : int
         The logarithmic base to use for a log-uniform prior.
+
         - Default 10, otherwise commonly 2.
 
     transform : "identity", "normalize", optional
@@ -409,6 +412,7 @@ class Integer(Dimension):
         can be int, np.int16, np.uint32, np.int32, np.int64 (default).
         When set to int, `inverse_transform` returns a list instead of
         a numpy array
+
     """
     def __init__(self, low, high, prior="uniform", base=10, transform=None,
                  name=None, dtype=np.int64):
@@ -560,7 +564,7 @@ class Categorical(Dimension):
     transform : "onehot", "string", "identity", "label", default="onehot"
         - "identity", the transformed space is the same as the original
           space.
-        -  "string",  the transformed space is a string encoded
+        - "string",  the transformed space is a string encoded
           representation of the original space.
         - "label", the transformed space is a label encoded
           representation (integer) of the original space.
@@ -569,6 +573,7 @@ class Categorical(Dimension):
 
     name : str or None
         Name associated with dimension, e.g., "colors".
+
     """
     def __init__(self, categories, prior=None, transform=None, name=None):
         self.categories = tuple(categories)
@@ -781,12 +786,13 @@ def from_yaml(cls, yml_path, namespace=None):
                   prior: log-uniform
         namespace : str, default=None
            Namespace within configuration file to use, will use first
-             namespace if not provided
+           namespace if not provided
 
         Returns
         -------
         space : Space
            Instantiated Space object
+
         """
         with open(yml_path, 'rb') as f:
             config = yaml.safe_load(f)

From 917cd65780b9c8af9f63fee766ae360ee0b9940b Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 13:01:06 +0100
Subject: [PATCH 192/265] Fix doc warnings

---
 skopt/space/space.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index 7c4177719..dafb636e5 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -219,6 +219,7 @@ class Real(Dimension):
 
     transform : "identity", "normalize", optional
         The following transformations are supported.
+
         - "identity", (default) the transformed space is the same as the
           original space.
         - "normalize", the transformed space is scaled to be between
@@ -773,17 +774,19 @@ def from_yaml(cls, yml_path, namespace=None):
         yml_path : str
             Full path to yaml configuration file, example YaML below:
             Space:
-              - Integer:
-                  low: -5
-                  high: 5
-              - Categorical:
-                  categories:
-                  - a
-                  - b
-              - Real:
-                  low: 1.0
-                  high: 5.0
-                  prior: log-uniform
+
+            - Integer:
+              low: -5
+              high: 5
+            - Categorical:
+              categories:
+              - a
+              - b
+            - Real:
+              low: 1.0
+              high: 5.0
+              prior: log-uniform
+
         namespace : str, default=None
            Namespace within configuration file to use, will use first
            namespace if not provided

From c01cce747cbd9b5145300f2a65c9ae6a6b4eb55a Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 13:51:59 +0100
Subject: [PATCH 193/265] Skip constant dimensions for plot_objective and
 plot_evaluations

* add plot to BayesSearchCV example
* Allow to plot using BayesSearchCV
* fix issue #566
---
 examples/sklearn-gridsearchcv-replacement.py | 16 ++++++++++++++++
 skopt/plots.py                               | 10 ++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/examples/sklearn-gridsearchcv-replacement.py b/examples/sklearn-gridsearchcv-replacement.py
index 2fd181bc9..791eb663a 100644
--- a/examples/sklearn-gridsearchcv-replacement.py
+++ b/examples/sklearn-gridsearchcv-replacement.py
@@ -35,6 +35,8 @@
 """
 print(__doc__)
 import numpy as np
+np.random.seed(123)
+import matplotlib.pyplot as plt
 
 #############################################################################
 # Minimal example
@@ -79,6 +81,7 @@
 
 from skopt import BayesSearchCV
 from skopt.space import Real, Categorical, Integer
+from skopt.plots import plot_objective
 
 from sklearn.datasets import load_digits
 from sklearn.svm import LinearSVC, SVC
@@ -122,6 +125,19 @@
 
 print("val. score: %s" % opt.best_score_)
 print("test score: %s" % opt.score(X_test, y_test))
+print("best params: %s" % str(opt.best_params_))
+
+#############################################################################
+# Partial Dependence plot of the objective function for SVC
+
+plot_objective(opt.optimizer_results_[0], dimensions=["C", "gamma", "degree", "kernel"])
+plt.show()
+
+#############################################################################
+# Partial Dependence plot of the objective function for LinearSVC
+
+plot_objective(opt.optimizer_results_[1], dimensions=["C"])
+plt.show()
 
 #############################################################################
 # Progress monitoring and control using `callback` argument of `fit` method
diff --git a/skopt/plots.py b/skopt/plots.py
index 8ce8b916c..49e8b398a 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -609,7 +609,8 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     plot_dims : list of str and int, default=None
         List of dimension names or dimension indices from the
         search-space dimensions to be included in the plot.
-        If `None` then use all dimensions from the search-space.
+        If `None` then use all dimensions except constant ones
+        from the search-space.
 
     sample_source : str or list of floats, default='random'
         Defines to samples generation to use for averaging the model function
@@ -672,6 +673,8 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         # Get all dimensions.
         plot_dims = []
         for row in range(space.n_dims):
+            if space.dimensions[row].is_constant:
+                continue
             plot_dims.append((row, space.dimensions[row]))
     else:
         plot_dims = space[plot_dims]
@@ -771,7 +774,8 @@ def plot_evaluations(result, bins=20, dimensions=None,
     plot_dims : list of str and int, default=None
         List of dimension names or dimension indices from the
         search-space dimensions to be included in the plot.
-        If `None` then use all dimensions from the search-space.
+        If `None` then use all dimensions except constant ones
+        from the search-space.
 
     Returns
     -------
@@ -790,6 +794,8 @@ def plot_evaluations(result, bins=20, dimensions=None,
         # Get all dimensions.
         plot_dims = []
         for row in range(space.n_dims):
+            if space.dimensions[row].is_constant:
+                continue
             plot_dims.append((row, space.dimensions[row]))
     else:
         plot_dims = space[plot_dims]

From ce0b39b20ff58e84a201db082a3235f6e9d5064c Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 14:00:50 +0100
Subject: [PATCH 194/265] Fix pep8 and add entry in whats_new

---
 doc/conf.py                                  | 3 ++-
 doc/templates/index.html                     | 1 +
 doc/whats_new/v0.8.rst                       | 4 ++++
 examples/sklearn-gridsearchcv-replacement.py | 3 ++-
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 0c69085df..cb35b4318 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -308,7 +308,8 @@ def __call__(self, directory):
 # thumbnails for the front page of the scikit-learn home page.
 # key: first image in set
 # values: (number of plot in set, height of thumbnail)
-carousel_thumbs = {'sphx_glr_plot_ask-and-tell_002.png': 600,
+carousel_thumbs = {'sphx_glr_sklearn-gridsearchcv-replacement_001.png': 600,
+                   'sphx_glr_plot_ask-and-tell_002.png': 600,
                    'sphx_glr_bayesian-optimization_004.png': 600,
                    'sphx_glr_strategy-comparison_002.png': 600,
                    'sphx_glr_visualizing-results_008.png': 600}
diff --git a/doc/templates/index.html b/doc/templates/index.html
index 98a369032..55649453c 100644
--- a/doc/templates/index.html
+++ b/doc/templates/index.html
@@ -35,6 +35,7 @@ <h4 class="sk-landing-subheader text-dark font-italic mb-3">Sequential model-bas
         </div>
         <div class="overflow-hidden mx-2 text-center flex-fill">
           <a href="auto_examples/sklearn-gridsearchcv-replacement.html"  aria-label="sklearn-gridsearchcv-replacement">
+            <img src="_images/sphx_glr_sklearn-gridsearchcv-replacement_001.png.png" class="sk-index-img" alt="Scikit-learn hyperparameter search wrapper">
           </a>
         </div>
       </div>
diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index a6b8d579b..d2407a7fc 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -37,6 +37,10 @@ Version 0.8.0
 - |Enhancement| Add `is_constant`  property to dimension and
   `n_constant_dimensions` property to Space
   :pr:`883` by :user:`Holger Nahrstaedt <holgern>`
+- |Enhancement| Skip constant dimensions for plot_objective and
+  plot_evaluations
+  to allow plots using BayesSearchCV
+  :pr:`888` by :user:`Holger Nahrstaedt <holgern>`
 
 :mod:`skopt.utils`
 ------------------
diff --git a/examples/sklearn-gridsearchcv-replacement.py b/examples/sklearn-gridsearchcv-replacement.py
index 791eb663a..2cd08015b 100644
--- a/examples/sklearn-gridsearchcv-replacement.py
+++ b/examples/sklearn-gridsearchcv-replacement.py
@@ -130,7 +130,8 @@
 #############################################################################
 # Partial Dependence plot of the objective function for SVC
 
-plot_objective(opt.optimizer_results_[0], dimensions=["C", "gamma", "degree", "kernel"])
+plot_objective(opt.optimizer_results_[0],
+               dimensions=["C", "gamma", "degree", "kernel"])
 plt.show()
 
 #############################################################################

From 4ebb5a67353d196fa75d3504f845bbcddf4347a1 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 14:51:01 +0100
Subject: [PATCH 195/265] Cleanup doc and add names to spaces for BayesSearchCV

Improve example
---
 doc/templates/index.html                     |  2 +-
 examples/sklearn-gridsearchcv-replacement.py | 16 +++----
 skopt/plots.py                               | 45 ++++++++++----------
 skopt/searchcv.py                            |  5 +++
 4 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/doc/templates/index.html b/doc/templates/index.html
index 55649453c..b3f0736fa 100644
--- a/doc/templates/index.html
+++ b/doc/templates/index.html
@@ -35,7 +35,7 @@ <h4 class="sk-landing-subheader text-dark font-italic mb-3">Sequential model-bas
         </div>
         <div class="overflow-hidden mx-2 text-center flex-fill">
           <a href="auto_examples/sklearn-gridsearchcv-replacement.html"  aria-label="sklearn-gridsearchcv-replacement">
-            <img src="_images/sphx_glr_sklearn-gridsearchcv-replacement_001.png.png" class="sk-index-img" alt="Scikit-learn hyperparameter search wrapper">
+            <img src="_images/sphx_glr_sklearn-gridsearchcv-replacement_001.png" class="sk-index-img" alt="Scikit-learn hyperparameter search wrapper">
           </a>
         </div>
       </div>
diff --git a/examples/sklearn-gridsearchcv-replacement.py b/examples/sklearn-gridsearchcv-replacement.py
index 2cd08015b..1d13815b2 100644
--- a/examples/sklearn-gridsearchcv-replacement.py
+++ b/examples/sklearn-gridsearchcv-replacement.py
@@ -81,7 +81,7 @@
 
 from skopt import BayesSearchCV
 from skopt.space import Real, Categorical, Integer
-from skopt.plots import plot_objective
+from skopt.plots import plot_objective, plot_histogram
 
 from sklearn.datasets import load_digits
 from sklearn.svm import LinearSVC, SVC
@@ -117,7 +117,7 @@
 
 opt = BayesSearchCV(
     pipe,
-    [(svc_search, 20), (linsvc_search, 16)], # (parameter space, # of evaluations)
+    [(svc_search, 40), (linsvc_search, 16)], # (parameter space, # of evaluations)
     cv=3
 )
 
@@ -129,15 +129,16 @@
 
 #############################################################################
 # Partial Dependence plot of the objective function for SVC
-
+#
 plot_objective(opt.optimizer_results_[0],
-               dimensions=["C", "gamma", "degree", "kernel"])
+               dimensions=["C", "degree", "gamma", "kernel"],
+               n_minimum_search=int(1e8))
 plt.show()
 
 #############################################################################
-# Partial Dependence plot of the objective function for LinearSVC
-
-plot_objective(opt.optimizer_results_[1], dimensions=["C"])
+# Plot of the histogram for LinearSVC
+#
+plot_histogram(opt.optimizer_results_[1], 1)
 plt.show()
 
 #############################################################################
@@ -170,7 +171,6 @@
     cv=3
 )
 
-
 # callback handler
 def on_step(optim_result):
     score = searchcv.best_score_
diff --git a/skopt/plots.py b/skopt/plots.py
index 49e8b398a..8f7c30416 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -168,8 +168,10 @@ def plot_gaussian_process(res, **kwargs):
 
     if ax is None:
         ax = plt.gca()
-    assert res.space.n_dims == 1, "Space dimension must be 1"
-    x, x_model = _evenly_sample(res.space.dimensions[0], n_points)
+    n_dims = res.space.n_dims
+    assert n_dims == 1, "Space dimension must be 1"
+    dimension = res.space.dimensions[0]
+    x, x_model = _evenly_sample(dimension, n_points)
     x = x.reshape(-1, 1)
     x_model = x_model.reshape(-1, 1)
     if res.specs is not None and "args" in res.specs:
@@ -624,31 +626,26 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
 
         Valid strings:
 
-            - 'random' - `n_samples` random samples will used
-
-            - 'result' - Use only the best observed parameters
-
-            - 'expected_minimum' - Parameters that gives the best
-                  minimum Calculated using scipy's minimize method.
-                  This method currently does not work with categorical values.
-
-            - 'expected_minimum_random' - Parameters that gives the
-                  best minimum when using naive random sampling.
-                  Works with categorical values.
+        - 'random' - `n_samples` random samples will used
+        - 'result' - Use only the best observed parameters
+        - 'expected_minimum' - Parameters that gives the best
+          minimum Calculated using scipy's minimize method.
+          This method currently does not work with categorical values.
+        - 'expected_minimum_random' - Parameters that gives the
+          best minimum when using naive random sampling.
+          Works with categorical values.
 
     minimum : str or list of floats, default = 'result'
         Defines the values for the red points in the plots.
         Valid strings:
 
-            - 'result' - Use best observed parameters
-
-            - 'expected_minimum' - Parameters that gives the best
-                  minimum Calculated using scipy's minimize method.
-                  This method currently does not work with categorical values.
-
-            - 'expected_minimum_random' - Parameters that gives the
-                  best minimum when using naive random sampling.
-                  Works with categorical values
+        - 'result' - Use best observed parameters
+        - 'expected_minimum' - Parameters that gives the best
+          minimum Calculated using scipy's minimize method.
+          This method currently does not work with categorical values.
+        - 'expected_minimum_random' - Parameters that gives the
+          best minimum when using naive random sampling.
+          Works with categorical values
 
     n_minimum_search : int, default = None
         Determines how many points should be evaluated
@@ -661,6 +658,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
     -------
     ax : `Matplotlib.Axes`
         A 2-d matrix of Axes-objects with the sub-plots.
+
     """
     # Here we define the values for which to plot the red dot (2d plot) and
     # the red dotted line (1d plot).
@@ -781,6 +779,7 @@ def plot_evaluations(result, bins=20, dimensions=None,
     -------
     ax : `Matplotlib.Axes`
         A 2-d matrix of Axes-objects with the sub-plots.
+
     """
     space = result.space
     # Convert categoricals to integers, so we can ensure consistent ordering.
@@ -858,6 +857,7 @@ def _get_ylim_diagonal(ax):
     -------
     ylim_diagonal : tuple(int)
         The common min and max ylim for the diagonal plots.
+
     """
 
     # Number of search-space dimensions used in this plot.
@@ -919,6 +919,7 @@ def partial_dependence_1D(space, model, i, samples,
     yi : np.array
         The average value of the modelled objective function at
         each point `xi`.
+
     """
     # The idea is to step through one dimension, evaluating the model with
     # that dimension fixed and averaging either over random values or over
diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 04ebc5454..13f4bafe9 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -549,6 +549,11 @@ def _make_optimizer(self, params_space):
         kwargs = self.optimizer_kwargs_.copy()
         kwargs['dimensions'] = dimensions_aslist(params_space)
         optimizer = Optimizer(**kwargs)
+        for i in range(len(optimizer.space.dimensions)):
+            if optimizer.space.dimensions[i].name is not None:
+                continue
+            optimizer.space.dimensions[i].name = list(sorted(
+                params_space.keys()))[i]
 
         return optimizer
 

From adab0677a3035ac199918cb9d9d2857bff03150f Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 14:54:08 +0100
Subject: [PATCH 196/265] Fix lint

---
 examples/sklearn-gridsearchcv-replacement.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/sklearn-gridsearchcv-replacement.py b/examples/sklearn-gridsearchcv-replacement.py
index 1d13815b2..2fa6b5f43 100644
--- a/examples/sklearn-gridsearchcv-replacement.py
+++ b/examples/sklearn-gridsearchcv-replacement.py
@@ -117,7 +117,8 @@
 
 opt = BayesSearchCV(
     pipe,
-    [(svc_search, 40), (linsvc_search, 16)], # (parameter space, # of evaluations)
+    # (parameter space, # of evaluations)
+    [(svc_search, 40), (linsvc_search, 16)],
     cv=3
 )
 

From b517005dcd3b9739f915cf164a7be8430068579f Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 28 Feb 2020 16:29:25 +0100
Subject: [PATCH 197/265] Fix typo in doc

---
 examples/sklearn-gridsearchcv-replacement.py | 8 ++++----
 skopt/space/space.py                         | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/sklearn-gridsearchcv-replacement.py b/examples/sklearn-gridsearchcv-replacement.py
index 2fa6b5f43..52a75a334 100644
--- a/examples/sklearn-gridsearchcv-replacement.py
+++ b/examples/sklearn-gridsearchcv-replacement.py
@@ -131,15 +131,15 @@
 #############################################################################
 # Partial Dependence plot of the objective function for SVC
 #
-plot_objective(opt.optimizer_results_[0],
-               dimensions=["C", "degree", "gamma", "kernel"],
-               n_minimum_search=int(1e8))
+_ = plot_objective(opt.optimizer_results_[0],
+                   dimensions=["C", "degree", "gamma", "kernel"],
+                   n_minimum_search=int(1e8))
 plt.show()
 
 #############################################################################
 # Plot of the histogram for LinearSVC
 #
-plot_histogram(opt.optimizer_results_[1], 1)
+_ = plot_histogram(opt.optimizer_results_[1], 1)
 plt.show()
 
 #############################################################################
diff --git a/skopt/space/space.py b/skopt/space/space.py
index a63938987..af34f1d1c 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -386,9 +386,9 @@ class Integer(Dimension):
         Distribution to use when sampling random integers for
         this dimension.
 
-        - If `"uniform"`, intgers are sampled uniformly between the lower
+        - If `"uniform"`, integers are sampled uniformly between the lower
           and upper bounds.
-        - If `"log-uniform"`, intgers are sampled uniformly between
+        - If `"log-uniform"`, integers are sampled uniformly between
           `log(lower, base)` and `log(upper, base)` where log
           has base `base`.
 

From 85bfd98760232570285644c1e86ef4d63fd270d7 Mon Sep 17 00:00:00 2001
From: WalzDS <david-simon.walz@basf.com>
Date: Thu, 5 Mar 2020 10:12:49 +0100
Subject: [PATCH 198/265] remove outdated version number & fix links to
 milestones page

---
 README.rst          | 5 ++---
 doc/development.rst | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index ab80c31da..bd0b6da01 100644
--- a/README.rst
+++ b/README.rst
@@ -37,8 +37,7 @@ Important links
 Install
 -------
 
-The latest released version of scikit-optimize is v0.7.2, which you can install
-with:
+You can install the latest release with:
 ::
 
     pip install scikit-optimize
@@ -105,7 +104,7 @@ Development
 
 The library is still experimental and under heavy development. Checkout
 the `next
-milestone <https://github.com/scikit-optimize/scikit-optimize/milestone/6>`__
+milestone <https://github.com/scikit-optimize/scikit-optimize/milestones>`__
 for the plans for the next release or look at some `easy
 issues <https://github.com/scikit-optimize/scikit-optimize/issues?q=is%3Aissue+is%3Aopen+label%3AEasy>`__
 to get started contributing.
diff --git a/doc/development.rst b/doc/development.rst
index 59bde45fa..d756fdbea 100644
--- a/doc/development.rst
+++ b/doc/development.rst
@@ -4,7 +4,7 @@ Development
 
 The library is still experimental and under heavy development. Checkout
 the `next
-milestone <https://github.com/scikit-optimize/scikit-optimize/milestone/7>`__
+milestone <https://github.com/scikit-optimize/scikit-optimize/milestones>`__
 for the plans for the next release or look at some `easy
 issues <https://github.com/scikit-optimize/scikit-optimize/issues?q=is%3Aissue+is%3Aopen+label%3AEasy>`__
 to get started contributing.

From f375f0182d79cea9029ce97ef963737e205a3527 Mon Sep 17 00:00:00 2001
From: holgern <holgernahrstaedt@gmx.de>
Date: Fri, 6 Mar 2020 20:52:03 +0100
Subject: [PATCH 199/265] Add link to development

---
 doc/contents.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/contents.rst b/doc/contents.rst
index 837fe4345..9ee878ddd 100644
--- a/doc/contents.rst
+++ b/doc/contents.rst
@@ -19,3 +19,4 @@ Table Of Contents
     user_guide
     auto_examples/index
     modules/classes
+    development

From 8f41dc996bebd8d1b1563954b128d3e8ed3a2c24 Mon Sep 17 00:00:00 2001
From: Thibaut Lienart <tlienart@me.com>
Date: Wed, 18 Mar 2020 12:11:42 +0100
Subject: [PATCH 200/265] Update bayesian-optimization.py

---
 examples/bayesian-optimization.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/bayesian-optimization.py b/examples/bayesian-optimization.py
index 412c89e15..5d6d50a08 100644
--- a/examples/bayesian-optimization.py
+++ b/examples/bayesian-optimization.py
@@ -38,9 +38,9 @@
    probabilistic model for the objective :math:`f`. Integrate out all
    possible true functions, using Gaussian process regression.
 
-2. optimize a cheap acquisition/utility function $u$ based on the posterior
+2. optimize a cheap acquisition/utility function :math:`u` based on the posterior
    distribution for sampling the next point.
-   .. math::`x_{t+1} = arg \\min_x u(x)`
+   :math:`x_{t+1} = arg \\min_x u(x)`
    Exploit uncertainty to balance exploration against exploitation.
 
 3. Sample the next observation :math:`y_{t+1}` at :math:`x_{t+1}`.

From 0f7bfda5d11a0be657942a30d644193f218df4df Mon Sep 17 00:00:00 2001
From: Thibaut Lienart <tlienart@me.com>
Date: Wed, 18 Mar 2020 12:18:31 +0100
Subject: [PATCH 201/265] make pep8 happy

---
 examples/bayesian-optimization.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/bayesian-optimization.py b/examples/bayesian-optimization.py
index 5d6d50a08..757c4d678 100644
--- a/examples/bayesian-optimization.py
+++ b/examples/bayesian-optimization.py
@@ -38,8 +38,8 @@
    probabilistic model for the objective :math:`f`. Integrate out all
    possible true functions, using Gaussian process regression.
 
-2. optimize a cheap acquisition/utility function :math:`u` based on the posterior
-   distribution for sampling the next point.
+2. optimize a cheap acquisition/utility function :math:`u` based on the
+   posterior distribution for sampling the next point.
    :math:`x_{t+1} = arg \\min_x u(x)`
    Exploit uncertainty to balance exploration against exploitation.
 

From e8464351ce7ae56354c2078565fd34bba2d46842 Mon Sep 17 00:00:00 2001
From: avelichk <andrey.velichkevich@gmail.com>
Date: Wed, 25 Mar 2020 19:26:38 +0000
Subject: [PATCH 202/265] acquisition fix

---
 skopt/optimizer/gp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index e91726026..83aebb731 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -135,7 +135,7 @@ def gp_minimize(func, dimensions, base_estimator=None,
           that of `"EIps"`
 
     acq_optimizer : string, `"sampling"` or `"lbfgs"`, default: `"lbfgs"`
-        Method to minimize the acquistion function. The fit model
+        Method to minimize the acquisition function. The fit model
         is updated with the optimal value obtained by optimizing `acq_func`
         with `acq_optimizer`.
 

From 7e9bbb6ed7243925686f476cdaabcade01a43c2c Mon Sep 17 00:00:00 2001
From: Kartik Ayyer <kartik.ayyer@desy.de>
Date: Sun, 19 Apr 2020 07:00:00 +0200
Subject: [PATCH 203/265] Fix position of minimum line in plot_objective()

Problem occurred when plot_dims is not the first
N dimensions
---
 skopt/plots.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 8f7c30416..9314966e8 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -717,7 +717,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                 else:
                     ax_ = ax
                 ax_.plot(xi, yi)
-                ax_.axvline(minimum[i], linestyle="--", color="r", lw=1)
+                ax_.axvline(minimum[index], linestyle="--", color="r", lw=1)
 
             # lower triangle
             elif i > j:

From 8dacf4dfa5614ca41b9578ecbca4a4bf116488c0 Mon Sep 17 00:00:00 2001
From: Kartik Ayyer <kartik.ayyer@desy.de>
Date: Sun, 19 Apr 2020 07:18:44 +0200
Subject: [PATCH 204/265] Add show_points and cmap options to plot_objective

`show_points` allows user to decide whether to see
evaluated points
`cmap` allows user to choose color map for contour
plot
---
 skopt/plots.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 9314966e8..5a76c1848 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -537,7 +537,8 @@ def partial_dependence(space, model, i, j=None, sample_points=None,
 
 def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                    zscale='linear', dimensions=None, sample_source='random',
-                   minimum='result', n_minimum_search=None, plot_dims=None):
+                   minimum='result', n_minimum_search=None, plot_dims=None,
+                   show_points=True, cmap='viridis_r'):
     """Plot a 2-d matrix with so-called Partial Dependence plots
     of the objective function. This shows the influence of each
     search-space dimension on the objective function.
@@ -586,7 +587,7 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
 
     levels : int, default=10
         Number of levels to draw on the contour plot, passed directly
-        to `plt.contour()`.
+        to `plt.contourf()`.
 
     n_points : int, default=40
         Number of points at which to evaluate the partial dependence
@@ -654,6 +655,14 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
         `sample_source` and/or `minimum` is set to
         'expected_minimum' or 'expected_minimum_random'.
 
+    show_points: bool, default = True
+        Choose whether to show evaluated points in the
+        contour plots.
+
+    cmap: str or Colormap, default = 'viridis_r'
+        Color map for contour plots. Passed directly to
+        `plt.contourf()`
+
     Returns
     -------
     ax : `Matplotlib.Axes`
@@ -728,9 +737,10 @@ def plot_objective(result, levels=10, n_points=40, n_samples=250, size=2,
                                                    index1, index2,
                                                    samples, n_points)
                 ax_.contourf(xi, yi, zi, levels,
-                             locator=locator, cmap='viridis_r')
-                ax_.scatter(x_samples[:, index2], x_samples[:, index1],
-                            c='k', s=10, lw=0.)
+                             locator=locator, cmap=cmap)
+                if show_points:
+                    ax_.scatter(x_samples[:, index2], x_samples[:, index1],
+                                c='k', s=10, lw=0.)
                 ax_.scatter(minimum[index2], minimum[index1],
                             c=['r'], s=100, lw=0., marker='*')
     ylabel = "Partial dependence"

From 5825c51f63d4f3634da88eeff82650ba1d33cd80 Mon Sep 17 00:00:00 2001
From: Kartik Ayyer <kartik.ayyer@desy.de>
Date: Sun, 19 Apr 2020 07:29:33 +0200
Subject: [PATCH 205/265] Fix xlim of diagonals in plot_objective

---
 skopt/plots.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/skopt/plots.py b/skopt/plots.py
index 5a76c1848..66af014b7 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -429,6 +429,8 @@ def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
 
             else:  # diagonal plots
                 ax_.set_ylim(*diagonal_ylim)
+                low, high = dim_i.bounds
+                ax_.set_xlim(low, high)
                 ax_.yaxis.tick_right()
                 ax_.yaxis.set_label_position('right')
                 ax_.yaxis.set_ticks_position('both')

From 0d9c4f737353469d4bfe8b3880cf1146f012f06b Mon Sep 17 00:00:00 2001
From: Kartik Ayyer <kartik.ayyer@desy.de>
Date: Sun, 19 Apr 2020 08:04:48 +0200
Subject: [PATCH 206/265] Fix setting xlim with categorical spaces

---
 skopt/plots.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 66af014b7..f962cd780 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -429,8 +429,9 @@ def _format_scatter_plot_axes(ax, space, ylabel, plot_dims,
 
             else:  # diagonal plots
                 ax_.set_ylim(*diagonal_ylim)
-                low, high = dim_i.bounds
-                ax_.set_xlim(low, high)
+                if not iscat[i]:
+                    low, high = dim_i.bounds
+                    ax_.set_xlim(low, high)
                 ax_.yaxis.tick_right()
                 ax_.yaxis.set_label_position('right')
                 ax_.yaxis.set_ticks_position('both')

From ce43c604d4937bf7493fce681c25b9f92ec80517 Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Mon, 18 May 2020 11:14:30 +0200
Subject: [PATCH 207/265] Update minimum requirements

Change minimum requirements, so that they are compmatible with sklearn 0.23
---
 .circleci/config.yml |  2 +-
 .travis.yml          | 22 +++++++++++-----------
 README.rst           |  9 +++++++++
 requirements.txt     |  6 +++---
 skopt/searchcv.py    |  1 +
 5 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 63d7077fe..3b307cc50 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -11,7 +11,7 @@ jobs:
       - CONDA_ENV_NAME: testenv
       - PYTHON_VERSION: 3.6
       - NUMPY_VERSION: 1.13.3
-      - SKLEARN_VERSION: 0.19.1
+      - SKLEARN_VERSION: 0.20.4
       - SCIPY_VERSION: 0.19.1
       - MATPLOTLIB_VERSION: 2.1.1
       # on conda, this is the latest for python 3.5
diff --git a/.travis.yml b/.travis.yml
index bd7dded86..d23c84c19 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,29 +19,29 @@ matrix:
     # Linux environment to test scikit-learn against numpy and scipy master
     # installed from their CI wheels in a virtualenv with the Python
     # interpreter provided by travis.
-    - name: "Python 3.5 - scikit 0.19.2"
+    - name: "Python 3.6 - scikit 0.20.4"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.5"
-           NUMPY_VERSION="1.12.0" SCIPY_VERSION="0.18.0" PYAML_VERSION="16.9.0"
-           SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
+           NUMPY_VERSION="1.13.3" SCIPY_VERSION="0.19.1" PYAML_VERSION="16.9.0"
+           SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
-    - name: "Python 3.6 - scikit 0.20.4"
+    - name: "Python 3.6 - scikit 0.21.3"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.14.0" SCIPY_VERSION="0.19.1" PYAML_VERSION="16.12.0"
-           SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
+           NUMPY_VERSION="1.14.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="16.12.0"
+           SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
-    - name: "Python 3.7 - scikit 0.21.3"
+    - name: "Python 3.7 - scikit 0.22.1"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.16.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="17.8.0"
-         SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="*" COVERAGE="true"
+         NUMPY_VERSION="1.16.0" SCIPY_VERSION="1.2.0" PYAML_VERSION="17.8.0"
+         SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.13"
-    - name: "Python 3.8  - scikit 0.22.1"
+    - name: "Python 3.8  - scikit 0.23.0"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.8.1" COVERAGE="false"
            NUMPY_VERSION="1.18.1" SCIPY_VERSION="1.4.1" PYAML_VERSION="19.12.0"
-           SCIKIT_LEARN_VERSION="0.22.1" JOBLIB_VERSION="0.14.1"
+           SCIKIT_LEARN_VERSION="0.23.0" JOBLIB_VERSION="0.14.1"
     - name: "Python 3.7 - sdist check"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
diff --git a/README.rst b/README.rst
index bd0b6da01..9ba472283 100644
--- a/README.rst
+++ b/README.rst
@@ -37,6 +37,15 @@ Important links
 Install
 -------
 
+scikit-optimize requires
+
+* Python >= 3.6
+* NumPy (>= 1.13.3)
+* SciPy (>= 0.19.1)
+* joblib (>= 0.11)
+* scikit-learn >= 0.20
+* matplotlib >= 2.0.0
+
 You can install the latest release with:
 ::
 
diff --git a/requirements.txt b/requirements.txt
index 5f2ef0417..1eaa3083a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
-numpy>=1.12
-scipy>=0.18
-scikit-learn>=0.19
+numpy>=1.13.3
+scipy>=0.19.1
+scikit-learn>=0.20
 matplotlib>=2.0.0
 pytest
 pyaml>=16.9
diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 13f4bafe9..765eea18c 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -14,6 +14,7 @@
 from sklearn.model_selection._search import BaseSearchCV
 from sklearn.utils import check_random_state
 from sklearn.utils.fixes import MaskedArray
+
 from sklearn.utils.validation import indexable, check_is_fitted
 try:
     from sklearn.metrics import check_scoring

From c2d5a5c1e90443fd972ecd7dc1c1b696ac7581d6 Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Mon, 18 May 2020 11:34:28 +0200
Subject: [PATCH 208/265] Update setup.py

---
 setup.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/setup.py b/setup.py
index 3b311924e..8879da880 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,6 @@
                'Operating System :: POSIX',
                'Operating System :: Unix',
                'Operating System :: MacOS',
-               'Programming Language :: Python :: 3.5',
                'Programming Language :: Python :: 3.6',
                'Programming Language :: Python :: 3.7',
                'Programming Language :: Python :: 3.8']
@@ -43,9 +42,9 @@
       classifiers=CLASSIFIERS,
       packages=['skopt', 'skopt.learning', 'skopt.optimizer', 'skopt.space',
                 'skopt.learning.gaussian_process', 'skopt.sampler'],
-      install_requires=['joblib>=0.11', 'pyaml>=16.9', 'numpy>=1.12.0',
-                        'scipy>=0.18.0',
-                        'scikit-learn>=0.19.1'],
+      install_requires=['joblib>=0.11', 'pyaml>=16.9', 'numpy>=1.13.3',
+                        'scipy>=0.19.1',
+                        'scikit-learn>=0.20.0'],
       extras_require={
         'plots':  ["matplotlib>=2.0.0"]
         }

From 831f6356b473bbd9d57b1b9963416968374fb3c5 Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Mon, 18 May 2020 11:49:57 +0200
Subject: [PATCH 209/265] Update doc

---
 doc/install.rst | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/doc/install.rst b/doc/install.rst
index 620ad836b..2744eb7b0 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -4,7 +4,14 @@
 Installation
 ============
 
-scikit-optimize supports Python 3.5 or newer.
+scikit-optimize requires:
+
+* Python >= 3.6
+* NumPy (>= 1.13.3)
+* SciPy (>= 0.19.1)
+* joblib (>= 0.11)
+* scikit-learn >= 0.20
+* matplotlib >= 2.0.0
 
 The newest release can be installed via pip:
 

From a400df2910f5b1842b77a202fdc844721b6310f8 Mon Sep 17 00:00:00 2001
From: Ludovic Tiako <ltiako@drwholdings.com>
Date: Thu, 18 Jun 2020 15:36:36 -0400
Subject: [PATCH 210/265] Add early stopper

---
 skopt/callbacks.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/skopt/callbacks.py b/skopt/callbacks.py
index 8c5e6d980..a2adcb5a2 100644
--- a/skopt/callbacks.py
+++ b/skopt/callbacks.py
@@ -219,6 +219,25 @@ def _criterion(self, result):
             return None
 
 
+class HollowIterationsStopper(EarlyStopper):
+    """
+    Stop the optimization if the improvement over the last n_iterations is too small.
+    """
+
+    def __init__(self, n_iterations, threshold=0):
+        super(HollowIterationsStopper, self).__init__()
+        self.n_iterations = n_iterations
+        self.threshold = abs(threshold)
+
+    def _criterion(self, result):
+
+        if len(result.func_vals) <= self.n_iterations:
+            return False
+
+        cummin = np.minimum.accumulate(result.func_vals)
+        return cummin[-self.n_iterations - 1] - cummin[-1] <= self.threshold
+
+
 class DeadlineStopper(EarlyStopper):
     """
     Stop the optimization before running out of a fixed budget of time.

From 9155da5710102ee546f3284266220946d96db0de Mon Sep 17 00:00:00 2001
From: Ludovic Tiako <ltiako@drwholdings.com>
Date: Thu, 18 Jun 2020 15:42:16 -0400
Subject: [PATCH 211/265] Add tests

---
 skopt/tests/test_callbacks.py | 44 +++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/skopt/tests/test_callbacks.py b/skopt/tests/test_callbacks.py
index de31de3f8..4ce788bf0 100644
--- a/skopt/tests/test_callbacks.py
+++ b/skopt/tests/test_callbacks.py
@@ -12,6 +12,7 @@
 from skopt.callbacks import DeltaYStopper
 from skopt.callbacks import DeadlineStopper
 from skopt.callbacks import CheckpointSaver
+from skopt.callbacks import HollowIterationsStopper
 
 from skopt.utils import load
 
@@ -47,6 +48,49 @@ def test_deadline_stopper():
     assert np.sum(deadline.iter_time) < deadline.total_time
 
 
+@pytest.mark.fast_test
+def test_hollow_iterations_stopper():
+    Result = namedtuple("Result", ["func_vals"])
+
+    hollow = HollowIterationsStopper(3, 0)
+    # will run at least n_iterations + 1 times
+    assert not hollow(Result([10, 11, 12]))
+    assert hollow(Result([10, 11, 12, 13]))
+
+    # a tie is not enough
+    assert hollow(Result([10, 11, 12, 10]))
+
+    # every time we make a new min, we then have n_iterations rounds to beat it
+    assert not hollow(Result([10, 9, 8, 7, 7, 7]))
+    assert hollow(Result([10, 9, 8, 7, 7, 7, 7]))
+
+    hollow = HollowIterationsStopper(3, 1.1)
+    assert not hollow(Result([10, 11, 12, 8.89]))
+    assert hollow(Result([10, 11, 12, 8.9]))
+
+    # individual improvement below threshold contribute
+    assert hollow(Result([10, 9.9, 9.8, 9.7]))
+    assert not hollow(Result([10, 9.5, 9, 8.5, 8, 7.5]))
+
+    hollow = HollowIterationsStopper(3, 0)
+    result = gp_minimize(
+        bench3, [(-1.0, 1.0)], callback=hollow, n_calls=100, random_state=1
+    )
+    assert len(result.func_vals) == 10
+
+    hollow = HollowIterationsStopper(3, 0.1)
+    result = gp_minimize(
+        bench3, [(-1.0, 1.0)], callback=hollow, n_calls=100, random_state=1
+    )
+    assert len(result.func_vals) == 5
+
+    hollow = HollowIterationsStopper(3, 0.2)
+    result = gp_minimize(
+        bench3, [(-1.0, 1.0)], callback=hollow, n_calls=100, random_state=1
+    )
+    assert len(result.func_vals) == 4
+
+
 @pytest.mark.fast_test
 def test_checkpoint_saver():
     checkpoint_path = "./test_checkpoint.pkl"

From 57c55a843173328274f3f0d29db96844b00b8388 Mon Sep 17 00:00:00 2001
From: Ludovic Tiako <ltiako@drwholdings.com>
Date: Thu, 18 Jun 2020 16:21:54 -0400
Subject: [PATCH 212/265] pep8 fix

---
 skopt/callbacks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/callbacks.py b/skopt/callbacks.py
index a2adcb5a2..4c31e6c39 100644
--- a/skopt/callbacks.py
+++ b/skopt/callbacks.py
@@ -221,7 +221,7 @@ def _criterion(self, result):
 
 class HollowIterationsStopper(EarlyStopper):
     """
-    Stop the optimization if the improvement over the last n_iterations is too small.
+    Stop if the improvement over the last n iterations is below a threshold.
     """
 
     def __init__(self, n_iterations, threshold=0):

From 1cee8b3b7b1b9b16afb7f5d6f12af2ca08907ee5 Mon Sep 17 00:00:00 2001
From: GCidd <georgesid95@gmail.com>
Date: Tue, 7 Jul 2020 10:55:32 +0300
Subject: [PATCH 213/265] Doc line, logspace call, set_transformer default
 parameter misstype, use_named_args test output fix and dimension
 normalization change

Starting from the doc fix:
Reading the docs for the BayesSearchCV parameters description for the acceptable input types for the search_spaces parameter, I noticed that the last acceptable patameter was on a wrong line, leading to some confusion. So, (dict, int) is returned to the previous line.

Running the pytest on the freshly downloaded dev branch, yielded in the following errors (including the fixes I applied):
In plots.py, in the plot_histogram method, np.logspace is called with the * operator for multiple arguments (np.logspace(*np.log10(dimension.bounds), bins)), which threw an error in the call. The parameter is also positioned before another one (which is wrong and if it worked otherwise, it should have been last).
In utils.py, in the use_named_args method, pytest checks if the printed values are the same with the expected ones. If pytest is run with python 2.7, the "print ("Best fitness", result.fun)" command prints the values differently (in a tuple, with the first item being the string and second the result value). This has a result of a false negative test result, specifically the result.fun value is correct, but the printed value is not exactly the same. For this reason I changed both the print commands to work in python 2.7 too.

In space/space.py, the set_transformer method in Integer and Real classes has a misstype in the default transform parameter (should be "identity" instead of "identitiy").

Lastly, with the new set_transformer method in the Dimension class (and the classes that inherit from it), in the normalize_dimensions method (in utils.py), there is not need to check for the type of dimension inside of the space object, to reinitialize the dimension with a different transform. Now, we can just call the set_transformer method of the Dimension object with the new transform type, without the need of multiple type checking.
---
 skopt/plots.py       |  3 ++-
 skopt/searchcv.py    |  3 +--
 skopt/space/space.py |  4 ++--
 skopt/utils.py       | 33 ++++++++++-----------------------
 4 files changed, 15 insertions(+), 28 deletions(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index f962cd780..0b803437e 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -1,3 +1,4 @@
+# -*- encoding: UTF-8 -*-
 """Plotting functions."""
 import sys
 import numpy as np
@@ -1242,7 +1243,7 @@ def plot_histogram(result, dimension_identifier, bins=20, rotate_labels=0,
         # in which case the histogram can be plotted more easily.
         if dimension.prior == 'log-uniform':
             # Map the number of bins to a log-space for the dimension bounds.
-            bins_mapped = np.logspace(*np.log10(dimension.bounds), bins)
+            bins_mapped = np.logspace(np.log10(dimension.bounds), bins)
         else:
             # Use the original number of bins.
             bins_mapped = bins
diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 765eea18c..b752d456a 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -53,8 +53,7 @@ class BayesSearchCV(BaseSearchCV):
         Either estimator needs to provide a ``score`` function,
         or ``scoring`` must be passed.
 
-    search_spaces : dict, list of dict or list of tuple containing
-        (dict, int).
+    search_spaces : dict, list of dict or list of tuple containing (dict, int).
         One of these cases:
         1. dictionary, where keys are parameter names (strings)
         and values are skopt.space.Dimension instances (Real, Integer
diff --git a/skopt/space/space.py b/skopt/space/space.py
index 4e0a0d59b..cf90fb867 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -278,7 +278,7 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
             transform = "identity"
         self.set_transformer(transform)
 
-    def set_transformer(self, transform="identitiy"):
+    def set_transformer(self, transform="identity"):
         """Define rvs and transformer spaces.
 
         Parameters
@@ -466,7 +466,7 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
             transform = "identity"
         self.set_transformer(transform)
 
-    def set_transformer(self, transform="identitiy"):
+    def set_transformer(self, transform="identity"):
         """Define _rvs and transformer spaces.
 
         Parameters
diff --git a/skopt/utils.py b/skopt/utils.py
index da6d54d0d..1af337803 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -370,7 +370,6 @@ def cook_estimator(base_estimator, space=None, **kwargs):
             space = Space(normalize_dimensions(space.dimensions))
             n_dims = space.transformed_n_dims
             is_cat = space.is_categorical
-
         else:
             raise ValueError("Expected a Space instance, not None.")
 
@@ -594,26 +593,14 @@ def normalize_dimensions(dimensions):
     space = Space(dimensions)
     transformed_dimensions = []
     for dimension in space.dimensions:
-        if isinstance(dimension, Categorical):
-            transformed_dimensions.append(Categorical(dimension.categories,
-                                                      dimension.prior,
-                                                      name=dimension.name,
-                                                      transform="normalize"))
-        # To make sure that GP operates in the [0, 1] space
-        elif isinstance(dimension, Real):
-            transformed_dimensions.append(
-                Real(dimension.low, dimension.high, dimension.prior,
-                     name=dimension.name,
-                     transform="normalize",
-                     dtype=dimension.dtype)
-                )
-        elif isinstance(dimension, Integer):
+		# check if dimension is of a Dimension instance
+        if isinstance(dimension, Dimension):
+			# Change the transformer to normalize
+			# and add it to the new transformed dimensions
+            dimension.set_transformer("normalize")
             transformed_dimensions.append(
-                Integer(dimension.low, dimension.high,
-                        name=dimension.name,
-                        transform="normalize",
-                        dtype=dimension.dtype)
-                )
+                dimension
+            )
         else:
             raise RuntimeError("Unknown dimension type "
                                "(%s)" % type(dimension))
@@ -727,10 +714,10 @@ def use_named_args(dimensions):
     ...                          n_calls=20, base_estimator="ET",
     ...                          random_state=4)
     >>>
-    >>> # Print the best-found results.
-    >>> print("Best fitness:", result.fun)
+    >>> # Print the best-found results in same format as the expected result.
+    >>> print("Best fitness: " + str(result.fun))
     Best fitness: 0.1948080835239698
-    >>> print("Best parameters:", result.x)
+    >>> print("Best parameters: {}".format(result.x))
     Best parameters: [0.44134853091052617, 0.06570954323368307, 0.17586123323419825]
 
     Parameters

From f24dcc4cfe05b73aa22f94b6c4d9f7862fd8a663 Mon Sep 17 00:00:00 2001
From: GCidd <georgesid95@gmail.com>
Date: Tue, 7 Jul 2020 11:09:15 +0300
Subject: [PATCH 214/265] PEP8 fixes from last commit.

---
 skopt/utils.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/skopt/utils.py b/skopt/utils.py
index 1af337803..a27e62c90 100644
--- a/skopt/utils.py
+++ b/skopt/utils.py
@@ -20,7 +20,6 @@
 from .sampler import InitialPointGenerator
 from .space import Space, Categorical, Integer, Real, Dimension
 
-
 __all__ = (
     "load",
     "dump",
@@ -593,10 +592,10 @@ def normalize_dimensions(dimensions):
     space = Space(dimensions)
     transformed_dimensions = []
     for dimension in space.dimensions:
-		# check if dimension is of a Dimension instance
+        # check if dimension is of a Dimension instance
         if isinstance(dimension, Dimension):
-			# Change the transformer to normalize
-			# and add it to the new transformed dimensions
+            # Change the transformer to normalize
+            # and add it to the new transformed dimensions
             dimension.set_transformer("normalize")
             transformed_dimensions.append(
                 dimension

From c0bfdd0cf60cd88c5688cd3eeb94f40e75d0a4e6 Mon Sep 17 00:00:00 2001
From: lucasplagwitz <lucas@plagwitz.de>
Date: Tue, 1 Sep 2020 13:26:20 +0200
Subject: [PATCH 215/265] travis version update, compatible with
 scikit-learn==0.23.2

---
 .travis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index d23c84c19..37be15dd7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,11 +37,11 @@ matrix:
          NUMPY_VERSION="1.16.0" SCIPY_VERSION="1.2.0" PYAML_VERSION="17.8.0"
          SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.13"
-    - name: "Python 3.8  - scikit 0.23.0"
+    - name: "Python 3.8  - scikit 0.23.2"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.8.1" COVERAGE="false"
-           NUMPY_VERSION="1.18.1" SCIPY_VERSION="1.4.1" PYAML_VERSION="19.12.0"
-           SCIKIT_LEARN_VERSION="0.23.0" JOBLIB_VERSION="0.14.1"
+           NUMPY_VERSION="1.19.1" SCIPY_VERSION="1.5.2" PYAML_VERSION="20.4.0"
+           SCIKIT_LEARN_VERSION="0.23.2" JOBLIB_VERSION="0.16.0"
     - name: "Python 3.7 - sdist check"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"

From 37cc84d6429997ee67f6db9e7b267d27fe2fdf7c Mon Sep 17 00:00:00 2001
From: lucasplagwitz <lucas@plagwitz.de>
Date: Tue, 1 Sep 2020 16:57:04 +0200
Subject: [PATCH 216/265] test adaptations for GP in sklearn >=0.23

---
 skopt/searchcv.py               |  3 +--
 skopt/tests/test_acquisition.py |  7 ++-----
 skopt/tests/test_common.py      | 22 +++++++++++-----------
 skopt/tests/test_gp_opt.py      | 23 ++++++++++++-----------
 skopt/tests/test_optimizer.py   |  6 ++----
 skopt/tests/test_plots.py       |  8 ++------
 skopt/tests/test_searchcv.py    |  4 +---
 skopt/tests/test_space.py       |  9 +++++----
 skopt/tests/test_utils.py       |  5 +----
 9 files changed, 37 insertions(+), 50 deletions(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 765eea18c..ecf6c201c 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -475,8 +475,7 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
         # Use one MaskedArray and mask all the places where the param is not
         # applicable for that candidate. Use defaultdict as each candidate may
         # not contain all the params
-        param_results = defaultdict(partial(
-                                            MaskedArray,
+        param_results = defaultdict(partial(np.ma.array,
                                             np.empty(n_candidates,),
                                             mask=True,
                                             dtype=object))
diff --git a/skopt/tests/test_acquisition.py b/skopt/tests/test_acquisition.py
index 0a6300a8b..13117dcbe 100644
--- a/skopt/tests/test_acquisition.py
+++ b/skopt/tests/test_acquisition.py
@@ -1,4 +1,3 @@
-from math import log
 import numpy as np
 import pytest
 
@@ -26,7 +25,6 @@ def predict(self, X, return_std=True):
         X = np.array(X)
         return np.zeros(X.shape[0]), np.ones(X.shape[0])
 
-
 # This is used to test that given constant acquisition values at
 # different points, acquisition functions "EIps" and "PIps"
 # prefer candidate points that take lesser time.
@@ -44,8 +42,7 @@ def fit(self, X, y):
         models the logarithm of the time.
         """
         X = np.array(X)
-        y = np.array(y)
-        gpr = cook_estimator("GP", self.space, random_state=0)
+        gpr = cook_estimator("GP", self.space, normalize_y=False)
         gpr.fit(X, np.log(np.ravel(X)))
         self.estimators_ = []
         self.estimators_.append(ConstSurrogate())
@@ -146,7 +143,7 @@ def test_acquisition_per_second(acq_func):
 def test_gaussian_acquisition_check_inputs():
     model = ConstantGPRSurrogate(Space(((1.0, 9.0),)))
     with pytest.raises(ValueError) as err:
-        vals = _gaussian_acquisition(np.arange(1, 5), model)
+        _gaussian_acquisition(np.arange(1, 5), model)
     assert("it must be 2-dimensional" in err.value.args[0])
 
 
diff --git a/skopt/tests/test_common.py b/skopt/tests/test_common.py
index ac9cbdf78..4feae300f 100644
--- a/skopt/tests/test_common.py
+++ b/skopt/tests/test_common.py
@@ -224,7 +224,7 @@ def test_init_points_and_models(n_initial_points, optimizer_func):
 
 
 @pytest.mark.slow_test
-@pytest.mark.parametrize("n_initial_points", [0, 5])
+@pytest.mark.parametrize("n_initial_points", [2, 5])
 @pytest.mark.parametrize("optimizer_func",
                          [gp_minimize, forest_minimize, gbrt_minimize])
 def test_init_vals(n_initial_points, optimizer_func):
@@ -247,26 +247,26 @@ def test_init_vals_dummy_minimize():
 @pytest.mark.slow_test
 @pytest.mark.parametrize("optimizer", [
         dummy_minimize,
-        partial(gp_minimize, n_initial_points=0),
-        partial(forest_minimize, n_initial_points=0),
-        partial(gbrt_minimize, n_initial_points=0)])
+        partial(gp_minimize, n_initial_points=3),
+        partial(forest_minimize, n_initial_points=3),
+        partial(gbrt_minimize, n_initial_points=3)])
 def test_categorical_init_vals(optimizer):
     space = [("-2", "-1", "0", "1", "2")]
     x0 = [["0"], ["1"], ["2"]]
-    n_calls = 4
+    n_calls = 6
     check_init_vals(optimizer, bench4, space, x0, n_calls)
 
 
 @pytest.mark.slow_test
 @pytest.mark.parametrize("optimizer", [
         dummy_minimize,
-        partial(gp_minimize, n_initial_points=0),
-        partial(forest_minimize, n_initial_points=0),
-        partial(gbrt_minimize, n_initial_points=0)])
+        partial(gp_minimize, n_initial_points=2),
+        partial(forest_minimize, n_initial_points=2),
+        partial(gbrt_minimize, n_initial_points=2)])
 def test_mixed_spaces(optimizer):
     space = [("-2", "-1", "0", "1", "2"), (-2.0, 2.0)]
     x0 = [["0", 2.0], ["1", 1.0], ["2", 1.0]]
-    n_calls = 4
+    n_calls = 5
     check_init_vals(optimizer, bench5, space, x0, n_calls)
 
 
@@ -420,7 +420,7 @@ def test_early_stopping_delta_x_empty_result_object(minimizer):
                     callback=DeltaXStopper(0.1),
                     dimensions=[(-1., 1.)],
                     n_calls=n_calls,
-                    n_initial_points=1, random_state=1)
+                    n_initial_points=2, random_state=1)
     assert len(res.x_iters) < n_calls
 
 
@@ -433,6 +433,6 @@ def bench1_with_time(x):
 
     n_calls = 3
     res = minimizer(bench1_with_time, [(-2.0, 2.0)],
-                    acq_func=acq_func, n_calls=n_calls, n_initial_points=1,
+                    acq_func=acq_func, n_calls=n_calls, n_initial_points=2,
                     random_state=1)
     assert len(res.log_time) == n_calls
diff --git a/skopt/tests/test_gp_opt.py b/skopt/tests/test_gp_opt.py
index ffd5f356f..6b5192f30 100644
--- a/skopt/tests/test_gp_opt.py
+++ b/skopt/tests/test_gp_opt.py
@@ -1,3 +1,4 @@
+import numpy as np
 from numpy.testing import assert_array_equal
 import pytest
 
@@ -7,7 +8,7 @@
 from skopt.benchmarks import bench3
 from skopt.benchmarks import bench4
 from skopt.benchmarks import branin
-from skopt.space.space import Real, Integer, Categorical, Space
+from skopt.space.space import Real, Categorical, Space
 from skopt.utils import cook_estimator
 
 
@@ -65,17 +66,17 @@ def test_gp_minimize_bench3(search, acq):
 def test_gp_minimize_bench4(search, acq):
     # this particular random_state picks "2" twice so we can make an extra
     # call to the objective without repeating options
-    check_minimize(bench4, 0.0,
-                   [("-2", "-1", "0", "1", "2")], search, acq, 1.05, 6, 2)
+    check_minimize(bench4, 0,
+                   [("-2", "-1", "0", "1", "2")], search, acq, 1.05, 20)
 
 
 @pytest.mark.fast_test
 def test_n_jobs():
     r_single = gp_minimize(bench3, [(-2.0, 2.0)], acq_optimizer="lbfgs",
-                           acq_func="EI", n_calls=2, n_initial_points=1,
+                           acq_func="EI", n_calls=4, n_initial_points=2,
                            random_state=1, noise=1e-10)
     r_double = gp_minimize(bench3, [(-2.0, 2.0)], acq_optimizer="lbfgs",
-                           acq_func="EI", n_calls=2, n_initial_points=1,
+                           acq_func="EI", n_calls=4, n_initial_points=2,
                            random_state=1, noise=1e-10, n_jobs=2)
     assert_array_equal(r_single.x_iters, r_double.x_iters)
 
@@ -83,7 +84,7 @@ def test_n_jobs():
 @pytest.mark.fast_test
 def test_gpr_default():
     """Smoke test that gp_minimize does not fail for default values."""
-    gp_minimize(branin, ((-5.0, 10.0), (0.0, 15.0)), n_initial_points=1,
+    gp_minimize(branin, ((-5.0, 10.0), (0.0, 15.0)), n_initial_points=2,
                 n_calls=2)
 
 
@@ -95,7 +96,7 @@ def test_use_given_estimator():
     noise_correct = 1e+5
     noise_fake = 1e-10
     estimator = cook_estimator("GP", domain, noise=noise_correct)
-    res = gp_minimize(branin, domain, n_calls=1, n_initial_points=1,
+    res = gp_minimize(branin, domain, n_calls=4, n_initial_points=2,
                       base_estimator=estimator, noise=noise_fake)
 
     assert res['models'][-1].noise == noise_correct
@@ -109,7 +110,7 @@ def test_use_given_estimator_with_max_model_size():
     noise_correct = 1e+5
     noise_fake = 1e-10
     estimator = cook_estimator("GP", domain, noise=noise_correct)
-    res = gp_minimize(branin, domain, n_calls=1, n_initial_points=1,
+    res = gp_minimize(branin, domain, n_calls=4, n_initial_points=2,
                       base_estimator=estimator, noise=noise_fake,
                       model_queue_size=1)
     assert len(res['models']) == 1
@@ -119,10 +120,10 @@ def test_use_given_estimator_with_max_model_size():
 @pytest.mark.fast_test
 def test_categorical_integer():
     def f(params):
-        return 0
+        return np.random.uniform()
 
     dims = [[1]]
-    res = gp_minimize(f, dims, n_calls=1, n_initial_points=1,
+    res = gp_minimize(f, dims, n_calls=2, n_initial_points=2,
                       random_state=1)
     assert res.x_iters[0][0] == dims[0][0]
 
@@ -143,7 +144,7 @@ def objective(param_list):
         loss = int(x) + y * z
         return loss
 
-    res = gp_minimize(objective, space, n_calls=12, random_state=1,
+    res = gp_minimize(objective, space, n_calls=20, random_state=1,
                       initial_point_generator=initgen)
     assert res["x"] in [['1', 4, 1.0], ['2', 4, 1.0]]
 
diff --git a/skopt/tests/test_optimizer.py b/skopt/tests/test_optimizer.py
index d318b0e82..ff9c4ee38 100644
--- a/skopt/tests/test_optimizer.py
+++ b/skopt/tests/test_optimizer.py
@@ -300,7 +300,7 @@ def test_optimizer_base_estimator_string_invalid():
 @pytest.mark.parametrize("base_estimator", ESTIMATOR_STRINGS)
 def test_optimizer_base_estimator_string_smoke(base_estimator):
     opt = Optimizer([(-2.0, 2.0)], base_estimator=base_estimator,
-                    n_initial_points=1, acq_func="EI")
+                    n_initial_points=2, acq_func="EI")
     opt.run(func=lambda x: x[0]**2, n_iter=3)
 
 
@@ -322,8 +322,6 @@ def test_defaults_are_equivalent():
         x = opt.ask()
         res_opt = opt.tell(x, branin(x))
 
-
-
     #res_min = forest_minimize(branin, space, n_calls=12, random_state=1)
     res_min = gp_minimize(branin, space, n_calls=12, random_state=1)
 
@@ -344,7 +342,7 @@ def test_dimensions_names():
     space = [Real(0, 1, name='real'),
              Categorical(['a', 'b', 'c'], name='cat'),
              Integer(0, 1, name='int')]
-    opt = Optimizer(space, n_initial_points=1)
+    opt = Optimizer(space, n_initial_points=2)
     # result of the optimizer missing dimension names
     result = opt.tell([(0.5, 'a', 0.5)], [3])
     names = []
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index aae53dca2..28333a42a 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -4,7 +4,6 @@
 from sklearn.datasets import load_breast_cancer
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.model_selection import cross_val_score
-from numpy.testing import assert_raises
 from numpy.testing import assert_array_almost_equal
 from skopt.space import Integer, Categorical
 from skopt import plots, gp_minimize
@@ -76,9 +75,6 @@ def objective(params):
     x_min, f_min = expected_minimum_random_sampling(res, random_state=1)
     x_min2, f_min2 = expected_minimum(res, random_state=1)
 
-    x_min, f_min = expected_minimum_random_sampling(res, random_state=1)
-    x_min2, f_min2 = expected_minimum(res, random_state=1)
-
     assert x_min == x_min2
     assert f_min == f_min2
 
@@ -168,10 +164,10 @@ def objective(x, noise_level=0.1):
                np.random.randn() * noise_level
 
     # Initialize Optimizer
-    opt = Optimizer([(-2.0, 2.0)], n_initial_points=1)
+    opt = Optimizer([(-2.0, 2.0)], n_initial_points=2)
 
     # Optimize
-    for i in range(2):
+    for i in range(3):
         next_x = opt.ask()
         f_val = objective(next_x)
         res = opt.tell(next_x, f_val)
diff --git a/skopt/tests/test_searchcv.py b/skopt/tests/test_searchcv.py
index e1a2ca5f5..67c15eee9 100644
--- a/skopt/tests/test_searchcv.py
+++ b/skopt/tests/test_searchcv.py
@@ -3,7 +3,6 @@
 """
 
 import pytest
-import time
 
 from sklearn.datasets import load_iris, make_classification
 from sklearn.model_selection import train_test_split
@@ -12,7 +11,6 @@
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.base import clone
 from sklearn.base import BaseEstimator
-from joblib import cpu_count
 from scipy.stats import rankdata
 import numpy as np
 from numpy.testing import assert_array_equal
@@ -449,7 +447,7 @@ def fit(self, X, y):
             return self
 
         def score(self, X, y):
-            return 0.0
+            return np.random.uniform()
 
     # Below is example code that used to not work.
     X, y = make_classification(10, 4)
diff --git a/skopt/tests/test_space.py b/skopt/tests/test_space.py
index e70c43f79..bd93e4e1a 100644
--- a/skopt/tests/test_space.py
+++ b/skopt/tests/test_space.py
@@ -761,11 +761,12 @@ def test_dimension_with_invalid_names(name):
 def test_purely_categorical_space():
     # Test reproduces the bug in #908, make sure it doesn't come back
     dims = [Categorical(['a', 'b', 'c']), Categorical(['A', 'B', 'C'])]
-    optimizer = Optimizer(dims, n_initial_points=1, random_state=3)
+    optimizer = Optimizer(dims, n_initial_points=2, random_state=3)
 
-    x = optimizer.ask()
-    # before the fix this call raised an exception
-    optimizer.tell(x, 1.)
+    for _ in range(2):
+        x = optimizer.ask()
+        # before the fix this call raised an exception
+        optimizer.tell(x, np.random.uniform())
 
 
 @pytest.mark.fast_test
diff --git a/skopt/tests/test_utils.py b/skopt/tests/test_utils.py
index 53e003d9d..2e1c22fa7 100644
--- a/skopt/tests/test_utils.py
+++ b/skopt/tests/test_utils.py
@@ -26,9 +26,6 @@
 from skopt.utils import check_list_types
 from skopt.utils import check_dimension_names
 from skopt.space import Real, Integer, Categorical
-from sklearn.datasets import load_breast_cancer
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.model_selection import cross_val_score
 
 
 def check_optimization_results_equality(res_1, res_2):
@@ -48,7 +45,7 @@ def test_dump_and_load():
                       x0=[0.],
                       acq_func="LCB",
                       n_calls=2,
-                      n_random_starts=0,
+                      n_random_starts=1,
                       random_state=1)
 
     # Test normal dumping and loading

From e535d02f99e52cfb8b89a5d5878482d8b050aa0b Mon Sep 17 00:00:00 2001
From: lucasplagwitz <lucas@plagwitz.de>
Date: Tue, 1 Sep 2020 22:27:56 +0200
Subject: [PATCH 217/265] enable matplotlib 'Agg' for sphinx, increase
 initial_points in some examples

---
 doc/conf.py                                  |  4 +++-
 examples/exploration-vs-exploitation.py      | 20 ++++++++++----------
 examples/interruptible-optimization.py       |  4 ++--
 examples/sklearn-gridsearchcv-replacement.py |  6 +++---
 examples/store-and-load-results.py           |  2 +-
 5 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index cb35b4318..da28ff28a 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -19,6 +19,9 @@
 import warnings
 import os
 import re
+import matplotlib
+matplotlib.use('Agg')
+import sphinx_gallery
 from packaging.version import parse
 # import pkg_resources
 import sys
@@ -26,7 +29,6 @@
 
 sys.path.insert(0, os.path.abspath('sphinxext'))
 from github_link import make_linkcode_resolve
-import sphinx_gallery
 
 #  __version__ = pkg_resources.get_distribution('skopt').version
 on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
diff --git a/examples/exploration-vs-exploitation.py b/examples/exploration-vs-exploitation.py
index bbabd0884..a4a3d1994 100644
--- a/examples/exploration-vs-exploitation.py
+++ b/examples/exploration-vs-exploitation.py
@@ -61,7 +61,7 @@ def objective_wo_noise(x):
 
 #############################################################################
 
-opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points = 1,
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points = 3,
                 acq_optimizer="sampling")
 
 #############################################################################
@@ -90,7 +90,7 @@ def objective_wo_noise(x):
 acq_func_kwargs = {"xi": 10000, "kappa": 10000}
 #############################################################################
 
-opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=3,
                 acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
@@ -101,7 +101,7 @@ def objective_wo_noise(x):
 #
 # This works both for kappa when using acq_func="LCB":
 
-opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=3,
                 acq_func="LCB", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
@@ -110,7 +110,7 @@ def objective_wo_noise(x):
 #############################################################################
 # And for xi when using acq_func="EI": or acq_func="PI":
 
-opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=3,
                 acq_func="PI", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
@@ -120,21 +120,21 @@ def objective_wo_noise(x):
 # We can also favor exploitaton:
 acq_func_kwargs = {"xi": 0.000001, "kappa": 0.001}
 #############################################################################
-opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=3,
                 acq_func="LCB", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
 opt.run(objective, n_iter=20)
 _ = plot_gaussian_process(opt.get_result(), **plot_args)
 #############################################################################
-opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=3,
                 acq_func="EI", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
 opt.run(objective, n_iter=20)
 _ = plot_gaussian_process(opt.get_result(), **plot_args)
 #############################################################################
-opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=3,
                 acq_func="PI", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
@@ -147,14 +147,14 @@ def objective_wo_noise(x):
 acq_func_kwargs = {"xi": -1000000000000}
 #############################################################################
 
-opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=3,
                 acq_func="PI", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
 opt.run(objective, n_iter=20)
 _ = plot_gaussian_process(opt.get_result(), **plot_args)
 #############################################################################
-opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=3,
                 acq_func="EI", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
@@ -169,7 +169,7 @@ def objective_wo_noise(x):
 # recalculated.
 acq_func_kwargs = {"kappa": 0}
 #############################################################################
-opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=1,
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=3,
                 acq_func="LCB", acq_optimizer="sampling",
                 acq_func_kwargs=acq_func_kwargs)
 #############################################################################
diff --git a/examples/interruptible-optimization.py b/examples/interruptible-optimization.py
index a4a0dff20..65e927232 100644
--- a/examples/interruptible-optimization.py
+++ b/examples/interruptible-optimization.py
@@ -68,7 +68,7 @@ def obj_fun(x, noise_level=noise_level):
               x0=[-20.],                     # the starting point
               acq_func="LCB",              # the acquisition function (optional)
               n_calls=10,                   # the number of evaluations of f including at x0
-              n_random_starts=0,           # the number of random initialization points
+              n_random_starts=3,           # the number of random initialization points
               callback=[checkpoint_saver], # a list of callbacks including the checkpoint saver
               random_state=777);
 
@@ -112,7 +112,7 @@ def obj_fun(x, noise_level=noise_level):
               y0=y0,              # observed values for x0
               acq_func="LCB",     # the acquisition function (optional)
               n_calls=10,         # the number of evaluations of f including at x0
-              n_random_starts=0,  # the number of random initialization points
+              n_random_starts=3,  # the number of random initialization points
               callback=[checkpoint_saver],
               random_state=777);
 
diff --git a/examples/sklearn-gridsearchcv-replacement.py b/examples/sklearn-gridsearchcv-replacement.py
index 52a75a334..727777a86 100644
--- a/examples/sklearn-gridsearchcv-replacement.py
+++ b/examples/sklearn-gridsearchcv-replacement.py
@@ -49,7 +49,7 @@
 from sklearn.svm import SVC
 from sklearn.model_selection import train_test_split
 
-X, y = load_digits(10, True)
+X, y = load_digits(n_class=10, return_X_y=True)
 X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, test_size=.25, random_state=0)
 
 # log-uniform: understand as search over p = exp(x) by varying x
@@ -88,7 +88,7 @@
 from sklearn.pipeline import Pipeline
 from sklearn.model_selection import train_test_split
 
-X, y = load_digits(10, True)
+X, y = load_digits(n_class=10, return_X_y=True)
 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
 # pipeline class is used as estimator to enable
@@ -163,7 +163,7 @@
 from sklearn.datasets import load_iris
 from sklearn.svm import SVC
 
-X, y = load_iris(True)
+X, y = load_iris(return_X_y=True)
 
 searchcv = BayesSearchCV(
     SVC(gamma='scale'),
diff --git a/examples/store-and-load-results.py b/examples/store-and-load-results.py
index c952f3d11..97e491c0e 100644
--- a/examples/store-and-load-results.py
+++ b/examples/store-and-load-results.py
@@ -60,7 +60,7 @@ def obj_fun(x, noise_level=noise_level):
                   x0=[0.],            # the starting point
                   acq_func="LCB",     # the acquisition function (optional)
                   n_calls=15,         # the number of evaluations of f including at x0
-                  n_random_starts=0,  # the number of random initialization points
+                  n_random_starts=3,  # the number of random initialization points
                   random_state=777)
 
 #############################################################################

From a30bddf7397d571aefb2c611b6e56d16e265bd94 Mon Sep 17 00:00:00 2001
From: lucasplagwitz <lucas@plagwitz.de>
Date: Tue, 1 Sep 2020 22:46:19 +0200
Subject: [PATCH 218/265] replace sphinx, cover pep8

---
 doc/conf.py                             |  6 ++---
 examples/exploration-vs-exploitation.py |  3 ++-
 examples/interruptible-optimization.py  | 32 ++++++++++++-------------
 examples/store-and-load-results.py      |  2 +-
 4 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index da28ff28a..3a842424e 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -19,9 +19,6 @@
 import warnings
 import os
 import re
-import matplotlib
-matplotlib.use('Agg')
-import sphinx_gallery
 from packaging.version import parse
 # import pkg_resources
 import sys
@@ -29,6 +26,9 @@
 
 sys.path.insert(0, os.path.abspath('sphinxext'))
 from github_link import make_linkcode_resolve
+import matplotlib
+matplotlib.use('Agg')
+import sphinx_gallery
 
 #  __version__ = pkg_resources.get_distribution('skopt').version
 on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
diff --git a/examples/exploration-vs-exploitation.py b/examples/exploration-vs-exploitation.py
index a4a3d1994..3a0f0b872 100644
--- a/examples/exploration-vs-exploitation.py
+++ b/examples/exploration-vs-exploitation.py
@@ -49,6 +49,7 @@
 
 noise_level = 0.1
 
+
 # Our 1D toy problem, this is the function we are trying to
 # minimize
 def objective(x, noise_level=noise_level):
@@ -61,7 +62,7 @@ def objective_wo_noise(x):
 
 #############################################################################
 
-opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points = 3,
+opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=3,
                 acq_optimizer="sampling")
 
 #############################################################################
diff --git a/examples/interruptible-optimization.py b/examples/interruptible-optimization.py
index 65e927232..d3ea368e8 100644
--- a/examples/interruptible-optimization.py
+++ b/examples/interruptible-optimization.py
@@ -63,14 +63,14 @@ def obj_fun(x, noise_level=noise_level):
 
 checkpoint_saver = CheckpointSaver("./checkpoint.pkl", compress=9) # keyword arguments will be passed to `skopt.dump`
 
-gp_minimize(obj_fun,                       # the function to minimize
-              [(-20.0, 20.0)],             # the bounds on each dimension of x
-              x0=[-20.],                     # the starting point
-              acq_func="LCB",              # the acquisition function (optional)
-              n_calls=10,                   # the number of evaluations of f including at x0
-              n_random_starts=3,           # the number of random initialization points
-              callback=[checkpoint_saver], # a list of callbacks including the checkpoint saver
-              random_state=777);
+gp_minimize(obj_fun,                      # the function to minimize
+            [(-20.0, 20.0)],              # the bounds on each dimension of x
+            x0=[-20.],                    # the starting point
+            acq_func="LCB",               # the acquisition function (optional)
+            n_calls=10,                   # the number of evaluations of f including at x0
+            n_random_starts=3,            # the number of random initial points
+            callback=[checkpoint_saver],  # a list of callbacks including the checkpoint saver
+            random_state=777);
 
 #############################################################################
 # Now let's assume this did not finish at once but took some long time: you
@@ -107,14 +107,14 @@ def obj_fun(x, noise_level=noise_level):
 y0 = res.func_vals
 
 gp_minimize(obj_fun,            # the function to minimize
-              [(-20.0, 20.0)],    # the bounds on each dimension of x
-              x0=x0,              # already examined values for x
-              y0=y0,              # observed values for x0
-              acq_func="LCB",     # the acquisition function (optional)
-              n_calls=10,         # the number of evaluations of f including at x0
-              n_random_starts=3,  # the number of random initialization points
-              callback=[checkpoint_saver],
-              random_state=777);
+            [(-20.0, 20.0)],    # the bounds on each dimension of x
+            x0=x0,              # already examined values for x
+            y0=y0,              # observed values for x0
+            acq_func="LCB",     # the acquisition function (optional)
+            n_calls=10,         # the number of evaluations of f including at x0
+            n_random_starts=3,  # the number of random initialization points
+            callback=[checkpoint_saver],
+            random_state=777);
 
 #############################################################################
 # Possible problems
diff --git a/examples/store-and-load-results.py b/examples/store-and-load-results.py
index 97e491c0e..e2ae4e6d4 100644
--- a/examples/store-and-load-results.py
+++ b/examples/store-and-load-results.py
@@ -60,7 +60,7 @@ def obj_fun(x, noise_level=noise_level):
                   x0=[0.],            # the starting point
                   acq_func="LCB",     # the acquisition function (optional)
                   n_calls=15,         # the number of evaluations of f including at x0
-                  n_random_starts=3,  # the number of random initialization points
+                  n_random_starts=3,  # the number of random initial points
                   random_state=777)
 
 #############################################################################

From b9c1bb1a69d8cdf04ee4d374fd66483186531641 Mon Sep 17 00:00:00 2001
From: lucasplagwitz <lucas@plagwitz.de>
Date: Wed, 2 Sep 2020 00:16:07 +0200
Subject: [PATCH 219/265] set matplotlib.backend by export

---
 build_tools/circle/build_doc.sh         |  1 +
 doc/conf.py                             | 12 +++++-------
 examples/exploration-vs-exploitation.py |  3 +--
 examples/interruptible-optimization.py  | 21 +++++++++++----------
 4 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index 8649463cd..e293f346b 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -174,6 +174,7 @@ conda create -n $CONDA_ENV_NAME --yes --quiet python="${PYTHON_VERSION:-*}" \
   scikit-image="${SCIKIT_IMAGE_VERSION:-*}" pandas="${PANDAS_VERSION:-*}" \
   joblib memory_profiler packaging
 
+export MPLBACKEND="agg"
 source activate testenv
 pip install sphinx-gallery
 pip install numpydoc
diff --git a/doc/conf.py b/doc/conf.py
index 3a842424e..793851cc5 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -15,20 +15,18 @@
 # import os
 # import sys
 # sys.path.insert(0, os.path.abspath('.'))
+import sys
+import os
+sys.path.insert(0, os.path.abspath('sphinxext'))
+import sphinx_gallery
 
 import warnings
-import os
 import re
 from packaging.version import parse
 # import pkg_resources
-import sys
 import skopt
-
-sys.path.insert(0, os.path.abspath('sphinxext'))
 from github_link import make_linkcode_resolve
-import matplotlib
-matplotlib.use('Agg')
-import sphinx_gallery
+
 
 #  __version__ = pkg_resources.get_distribution('skopt').version
 on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
diff --git a/examples/exploration-vs-exploitation.py b/examples/exploration-vs-exploitation.py
index 3a0f0b872..c00fd205f 100644
--- a/examples/exploration-vs-exploitation.py
+++ b/examples/exploration-vs-exploitation.py
@@ -59,12 +59,11 @@ def objective(x, noise_level=noise_level):
 
 def objective_wo_noise(x):
     return objective(x, noise_level=0)
-
 #############################################################################
 
+
 opt = Optimizer([(-2.0, 2.0)], "GP", n_initial_points=3,
                 acq_optimizer="sampling")
-
 #############################################################################
 # Plotting parameters
 
diff --git a/examples/interruptible-optimization.py b/examples/interruptible-optimization.py
index d3ea368e8..b10b71c8f 100644
--- a/examples/interruptible-optimization.py
+++ b/examples/interruptible-optimization.py
@@ -63,14 +63,15 @@ def obj_fun(x, noise_level=noise_level):
 
 checkpoint_saver = CheckpointSaver("./checkpoint.pkl", compress=9) # keyword arguments will be passed to `skopt.dump`
 
-gp_minimize(obj_fun,                      # the function to minimize
-            [(-20.0, 20.0)],              # the bounds on each dimension of x
-            x0=[-20.],                    # the starting point
-            acq_func="LCB",               # the acquisition function (optional)
-            n_calls=10,                   # the number of evaluations of f including at x0
-            n_random_starts=3,            # the number of random initial points
-            callback=[checkpoint_saver],  # a list of callbacks including the checkpoint saver
-            random_state=777);
+gp_minimize(obj_fun,            # the function to minimize
+            [(-20.0, 20.0)],    # the bounds on each dimension of x
+            x0=[-20.],          # the starting point
+            acq_func="LCB",     # the acquisition function (optional)
+            n_calls=10,         # number of evaluations of f including at x0
+            n_random_starts=3,  # the number of random initial points
+            callback=[checkpoint_saver],
+            # a list of callbacks including the checkpoint saver
+            random_state=777)
 
 #############################################################################
 # Now let's assume this did not finish at once but took some long time: you
@@ -111,10 +112,10 @@ def obj_fun(x, noise_level=noise_level):
             x0=x0,              # already examined values for x
             y0=y0,              # observed values for x0
             acq_func="LCB",     # the acquisition function (optional)
-            n_calls=10,         # the number of evaluations of f including at x0
+            n_calls=10,         # number of evaluations of f including at x0
             n_random_starts=3,  # the number of random initialization points
             callback=[checkpoint_saver],
-            random_state=777);
+            random_state=777)
 
 #############################################################################
 # Possible problems

From fec9ff6313814491d2a4edc16bbd4e82b558f8ed Mon Sep 17 00:00:00 2001
From: lucasplagwitz <lucas@plagwitz.de>
Date: Wed, 2 Sep 2020 00:19:25 +0200
Subject: [PATCH 220/265] revert conf.py imports

---
 doc/conf.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 793851cc5..b0477a7df 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -17,15 +17,14 @@
 # sys.path.insert(0, os.path.abspath('.'))
 import sys
 import os
-sys.path.insert(0, os.path.abspath('sphinxext'))
-import sphinx_gallery
-
 import warnings
 import re
 from packaging.version import parse
 # import pkg_resources
 import skopt
 from github_link import make_linkcode_resolve
+sys.path.insert(0, os.path.abspath('sphinxext'))
+import sphinx_gallery
 
 
 #  __version__ = pkg_resources.get_distribution('skopt').version

From f3425618c214e19ef633969b7b2b081be31f80a5 Mon Sep 17 00:00:00 2001
From: lucasplagwitz <lucas@plagwitz.de>
Date: Wed, 2 Sep 2020 00:25:57 +0200
Subject: [PATCH 221/265] revert doc/conf.py II

---
 doc/conf.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index b0477a7df..370802f18 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -15,15 +15,16 @@
 # import os
 # import sys
 # sys.path.insert(0, os.path.abspath('.'))
-import sys
-import os
 import warnings
+import os
 import re
 from packaging.version import parse
 # import pkg_resources
+import sys
 import skopt
-from github_link import make_linkcode_resolve
+
 sys.path.insert(0, os.path.abspath('sphinxext'))
+from github_link import make_linkcode_resolve
 import sphinx_gallery
 
 

From de41c43518dc15630d16c114604667d79c919b2a Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Thu, 3 Sep 2020 14:08:33 +0200
Subject: [PATCH 222/265] Sphinx-galalery accepts now pickled objects

---
 doc/requirements.txt                   |  2 +-
 examples/interruptible-optimization.py | 15 ++-------------
 2 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 1ab1b285d..4713469ee 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -4,5 +4,5 @@ matplotlib
 pandas
 ipywidgets
 sphinx
-sphinx-gallery
+sphinx-gallery>=0.6
 numpydoc
diff --git a/examples/interruptible-optimization.py b/examples/interruptible-optimization.py
index b10b71c8f..0db37af43 100644
--- a/examples/interruptible-optimization.py
+++ b/examples/interruptible-optimization.py
@@ -32,11 +32,6 @@
 np.random.seed(777)
 import os
 
-# The followings are hacks to allow sphinx-gallery to run the example.
-sys.path.insert(0, os.getcwd())
-main_dir = os.path.basename(sys.modules['__main__'].__file__)
-IS_RUN_WITH_SPHINX_GALLERY = main_dir != os.getcwd()
-
 #############################################################################
 # Simple example
 # ==============
@@ -52,14 +47,8 @@
 
 noise_level = 0.1
 
-if IS_RUN_WITH_SPHINX_GALLERY:
-    # When this example is run with sphinx gallery, it breaks the pickling
-    # capacity for multiprocessing backend so we have to modify the way we
-    # define our functions. This has nothing to do with the example.
-    from utils import obj_fun
-else:
-    def obj_fun(x, noise_level=noise_level):
-        return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) + np.random.randn() * noise_level
+def obj_fun(x, noise_level=noise_level):
+    return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) + np.random.randn() * noise_level
 
 checkpoint_saver = CheckpointSaver("./checkpoint.pkl", compress=9) # keyword arguments will be passed to `skopt.dump`
 

From ecaef5efc1685b3958909becbcae24461128d35c Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Thu, 3 Sep 2020 14:12:44 +0200
Subject: [PATCH 223/265] Fix linting

---
 examples/interruptible-optimization.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/interruptible-optimization.py b/examples/interruptible-optimization.py
index 0db37af43..0d39ecf2b 100644
--- a/examples/interruptible-optimization.py
+++ b/examples/interruptible-optimization.py
@@ -47,8 +47,10 @@
 
 noise_level = 0.1
 
+
 def obj_fun(x, noise_level=noise_level):
-    return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) + np.random.randn() * noise_level
+    return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) + np.random.randn() \
+        * noise_level
 
 checkpoint_saver = CheckpointSaver("./checkpoint.pkl", compress=9) # keyword arguments will be passed to `skopt.dump`
 

From 09bafcdf2c6c5b57f56359f0fa85855b2330648a Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Thu, 3 Sep 2020 15:35:28 +0200
Subject: [PATCH 224/265] Fix doc build for sphinx

---
 examples/store-and-load-results.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/examples/store-and-load-results.py b/examples/store-and-load-results.py
index e2ae4e6d4..655fb208e 100644
--- a/examples/store-and-load-results.py
+++ b/examples/store-and-load-results.py
@@ -31,10 +31,6 @@
 import os
 import sys
 
-# The followings are hacks to allow sphinx-gallery to run the example.
-sys.path.insert(0, os.getcwd())
-main_dir = os.path.basename(sys.modules['__main__'].__file__)
-IS_RUN_WITH_SPHINX_GALLERY = main_dir != os.getcwd()
 
 #############################################################################
 # Simple example
@@ -46,14 +42,10 @@
 from skopt import gp_minimize
 noise_level = 0.1
 
-if IS_RUN_WITH_SPHINX_GALLERY:
-    # When this example is run with sphinx gallery, it breaks the pickling
-    # capacity for multiprocessing backend so we have to modify the way we
-    # define our functions. This has nothing to do with the example.
-    from utils import obj_fun
-else:
-    def obj_fun(x, noise_level=noise_level):
-        return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) + np.random.randn() * noise_level
+
+def obj_fun(x, noise_level=noise_level):
+    return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) + np.random.randn() \
+        * noise_level
 
 res = gp_minimize(obj_fun,            # the function to minimize
                   [(-2.0, 2.0)],      # the bounds on each dimension of x

From 84ddc49183a5d3e40e2c37957b577d0c1d4a0ea9 Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Thu, 3 Sep 2020 16:47:31 +0200
Subject: [PATCH 225/265] Prepare next development version

---
 doc/whats_new/v0.8.rst | 10 +++++++++-
 doc/whats_new/v0.9.rst |  9 +++++++++
 skopt/__init__.py      |  2 +-
 3 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 doc/whats_new/v0.9.rst

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 063662957..ea8bd641f 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -6,7 +6,7 @@
 
 Version 0.8.0
 =============
-**In Development**
+**September 2020**
 
 :mod:`skopt.Optimizer`
 ----------------------
@@ -32,6 +32,14 @@ Version 0.8.0
 - |Enhancement| Improve sampler and add grid sampler
   :pr:`851` by :user:`Holger Nahrstaedt <holgern>`
 
+:mod:`skopt.searchcv`
+---------------------
+- |Fix| Fix library for scikit-learn >= 0.23.
+  numpy MaskArray is replaced by numpy.ma.array.
+  y_normalize=False has been added and initial runs
+  has been increased.
+  :pr: `939` by :user:`Lucas Plagwitz <lucasplagwitz>`
+
 :mod:`skopt.space`
 ------------------
 - |Fix| Fix Integer transform and inverse_transform for normalize
diff --git a/doc/whats_new/v0.9.rst b/doc/whats_new/v0.9.rst
new file mode 100644
index 000000000..e4c613f90
--- /dev/null
+++ b/doc/whats_new/v0.9.rst
@@ -0,0 +1,9 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: skopt
+
+.. _changes_0_8:
+
+Version 0.9.0
+=============
+**In Development**
diff --git a/skopt/__init__.py b/skopt/__init__.py
index 2531d63a7..914c8f2f3 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -29,7 +29,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = "0.8.dev0"
+__version__ = "0.9.dev0"
 
 if __SKOPT_SETUP__:
     import sys

From afb6096b97e10cc3f38ee9f653f592b805487647 Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Thu, 3 Sep 2020 20:08:55 +0200
Subject: [PATCH 226/265] Revert change to plot_histogram

---
 skopt/plots.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/plots.py b/skopt/plots.py
index 0b803437e..5a14b01bf 100644
--- a/skopt/plots.py
+++ b/skopt/plots.py
@@ -1243,7 +1243,7 @@ def plot_histogram(result, dimension_identifier, bins=20, rotate_labels=0,
         # in which case the histogram can be plotted more easily.
         if dimension.prior == 'log-uniform':
             # Map the number of bins to a log-space for the dimension bounds.
-            bins_mapped = np.logspace(np.log10(dimension.bounds), bins)
+            bins_mapped = np.logspace(*np.log10(dimension.bounds), bins)
         else:
             # Use the original number of bins.
             bins_mapped = bins

From 72af1652548a10f75d9c630b2146933640e1d2d2 Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Fri, 4 Sep 2020 09:37:01 +0200
Subject: [PATCH 227/265] Fixes GaussianProcessRegressor for sklearn >= 0.23

Fixes changed introduced by https://github.com/scikit-learn/scikit-learn/pull/15782/files
---
 doc/whats_new/v0.8.rst                 |  7 +++++++
 skopt/learning/gaussian_process/gpr.py | 18 ++++++++++++++----
 skopt/tests/test_acquisition.py        | 14 ++++++++++++++
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index ea8bd641f..b2ec63e3c 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -4,6 +4,13 @@
 
 .. _changes_0_8:
 
+Version 0.8.1
+=============
+**September 2020**
+
+- |Fix| GaussianProcessRegressor on sklearn 0.23 normalizes the
+  variance to 1, which needs to reverted on predict.
+
 Version 0.8.0
 =============
 **September 2020**
diff --git a/skopt/learning/gaussian_process/gpr.py b/skopt/learning/gaussian_process/gpr.py
index 331d19ad7..9af582f9a 100644
--- a/skopt/learning/gaussian_process/gpr.py
+++ b/skopt/learning/gaussian_process/gpr.py
@@ -224,10 +224,15 @@ def fit(self, X, y):
         self.K_inv_ = L_inv.dot(L_inv.T)
 
         # Fix deprecation warning #462
-        if int(sklearn.__version__[2:4]) >= 19:
+        if int(sklearn.__version__[2:4]) >= 23:
+            self.y_train_std_ = self._y_train_std
             self.y_train_mean_ = self._y_train_mean
+        elif int(sklearn.__version__[2:4]) >= 19:
+            self.y_train_mean_ = self._y_train_mean
+            self.y_train_std_ = 1
         else:
             self.y_train_mean_ = self.y_train_mean
+            self.y_train_std_ = 1
 
         return self
 
@@ -309,11 +314,14 @@ def predict(self, X, return_std=False, return_cov=False,
         else:  # Predict based on GP posterior
             K_trans = self.kernel_(X, self.X_train_)
             y_mean = K_trans.dot(self.alpha_)    # Line 4 (y_mean = f_star)
-            y_mean = self.y_train_mean_ + y_mean  # undo normal.
+            # undo normalisation
+            y_mean = self.y_train_std_ * y_mean + self.y_train_mean_
 
             if return_cov:
                 v = cho_solve((self.L_, True), K_trans.T)  # Line 5
                 y_cov = self.kernel_(X) - K_trans.dot(v)   # Line 6
+                # undo normalisation
+                y_cov = y_cov * self.y_train_std_**2
                 return y_mean, y_cov
 
             elif return_std:
@@ -330,17 +338,19 @@ def predict(self, X, return_std=False, return_cov=False,
                     warnings.warn("Predicted variances smaller than 0. "
                                   "Setting those variances to 0.")
                     y_var[y_var_negative] = 0.0
+                # undo normalisation
+                y_var = y_var * self.y_train_std_**2
                 y_std = np.sqrt(y_var)
 
             if return_mean_grad:
                 grad = self.kernel_.gradient_x(X[0], self.X_train_)
-                grad_mean = np.dot(grad.T, self.alpha_)
+                grad_mean = np.dot(grad.T, self.alpha_) * self.y_train_std_
 
                 if return_std_grad:
                     grad_std = np.zeros(X.shape[1])
                     if not np.allclose(y_std, grad_std):
                         grad_std = -np.dot(K_trans,
-                                           np.dot(K_inv, grad))[0] / y_std
+                                           np.dot(K_inv, grad))[0] / y_std * self.y_train_std_**2
                     return y_mean, y_std, grad_mean, grad_std
 
                 if return_std:
diff --git a/skopt/tests/test_acquisition.py b/skopt/tests/test_acquisition.py
index 13117dcbe..a50b9116b 100644
--- a/skopt/tests/test_acquisition.py
+++ b/skopt/tests/test_acquisition.py
@@ -119,6 +119,19 @@ def test_acquisition_gradient():
         check_gradient_correctness(X_new, gpr, acq_func, np.max(y))
 
 
+@pytest.mark.fast_test
+def test_acquisition_gradient_cookbook():
+    rng = np.random.RandomState(0)
+    X = rng.randn(20, 5)
+    y = rng.randn(20)
+    X_new = rng.randn(5)
+    gpr = cook_estimator("GP", Space(((-5.0, 5.0),)), random_state=0)
+    gpr.fit(X, y)
+
+    for acq_func in ["LCB", "PI", "EI"]:
+        check_gradient_correctness(X_new, gpr, acq_func, np.max(y))
+
+
 @pytest.mark.fast_test
 @pytest.mark.parametrize("acq_func", ["EIps", "PIps"])
 def test_acquisition_per_second(acq_func):
@@ -160,4 +173,5 @@ def test_acquisition_per_second_gradient(acq_func):
         gpr = cook_estimator("GP", Space(((-5.0, 5.0),)), random_state=0)
         mor = MultiOutputRegressor(gpr)
         mor.fit(X, y)
+        
         check_gradient_correctness(X_new, mor, acq_func, 1.5)

From faa47ad36c5d2ce6af112e7af2666c5cf48322c6 Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Fri, 4 Sep 2020 09:40:17 +0200
Subject: [PATCH 228/265] Fix linting

---
 skopt/learning/gaussian_process/gpr.py | 9 ++++++---
 skopt/tests/test_acquisition.py        | 1 -
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/skopt/learning/gaussian_process/gpr.py b/skopt/learning/gaussian_process/gpr.py
index 9af582f9a..6b13bb686 100644
--- a/skopt/learning/gaussian_process/gpr.py
+++ b/skopt/learning/gaussian_process/gpr.py
@@ -344,13 +344,16 @@ def predict(self, X, return_std=False, return_cov=False,
 
             if return_mean_grad:
                 grad = self.kernel_.gradient_x(X[0], self.X_train_)
-                grad_mean = np.dot(grad.T, self.alpha_) * self.y_train_std_
-
+                grad_mean = np.dot(grad.T, self.alpha_)
+                # undo normalisation
+                grad_mean = grad_mean * self.y_train_std_
                 if return_std_grad:
                     grad_std = np.zeros(X.shape[1])
                     if not np.allclose(y_std, grad_std):
                         grad_std = -np.dot(K_trans,
-                                           np.dot(K_inv, grad))[0] / y_std * self.y_train_std_**2
+                                           np.dot(K_inv, grad))[0] / y_std
+                        # undo normalisation
+                        grad_std = grad_std * self.y_train_std_**2
                     return y_mean, y_std, grad_mean, grad_std
 
                 if return_std:
diff --git a/skopt/tests/test_acquisition.py b/skopt/tests/test_acquisition.py
index a50b9116b..a75d20550 100644
--- a/skopt/tests/test_acquisition.py
+++ b/skopt/tests/test_acquisition.py
@@ -173,5 +173,4 @@ def test_acquisition_per_second_gradient(acq_func):
         gpr = cook_estimator("GP", Space(((-5.0, 5.0),)), random_state=0)
         mor = MultiOutputRegressor(gpr)
         mor.fit(X, y)
-        
         check_gradient_correctness(X_new, mor, acq_func, 1.5)

From b019bd36b9432a830c39bd0f5a6c21922b2242cf Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Fri, 4 Sep 2020 09:46:22 +0200
Subject: [PATCH 229/265] Fix wrong label

---
 doc/whats_new/v0.9.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.9.rst b/doc/whats_new/v0.9.rst
index e4c613f90..42bec8647 100644
--- a/doc/whats_new/v0.9.rst
+++ b/doc/whats_new/v0.9.rst
@@ -2,7 +2,7 @@
 
 .. currentmodule:: skopt
 
-.. _changes_0_8:
+.. _changes_0_9:
 
 Version 0.9.0
 =============

From ff574d1d5be1076549b5111e373abd70cbe3ad05 Mon Sep 17 00:00:00 2001
From: Holger Nahrstaedt <holgernahrstaedt@gmx.de>
Date: Fri, 4 Sep 2020 09:59:02 +0200
Subject: [PATCH 230/265] Add missing ref to 0.9

---
 doc/templates/index.html | 2 ++
 doc/whats_new.rst        | 1 +
 doc/whats_new/v0.8.rst   | 4 +++-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/templates/index.html b/doc/templates/index.html
index b3f0736fa..7aff01909 100644
--- a/doc/templates/index.html
+++ b/doc/templates/index.html
@@ -109,6 +109,8 @@ <h4 class="sk-landing-call-header">News</h4>
         <li><strong>On-going development:</strong>
         <a href="https://scikit-optimize.github.io/dev/whats_new.html"><strong>What's new</strong> (Changelog)</a>
         </li>
+		<li><strong>Sep 2020.</strong> scikit-optimize 0.8.1  (<a href="whats_new/v0.8.html#version-0-8-1">Changelog</a>).
+		<li><strong>Sep 2020.</strong> scikit-optimize 0.8  (<a href="whats_new/v0.8.html#version-0-8">Changelog</a>).
         <li><strong>Feb 2020.</strong> scikit-optimize 0.7.2  (<a href="whats_new/v0.7.html#version-0-7-2">Changelog</a>).
         <li><strong>Feb 2020.</strong> scikit-optimize 0.7.1  (<a href="whats_new/v0.7.html#version-0-7-1">Changelog</a>).
         <li><strong>Jan 2020.</strong> scikit-optimize 0.7  (<a href="whats_new/v0.7.html#version-0-7">Changelog</a>).
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index b1e166f8e..15dd110f7 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -8,6 +8,7 @@ Release notes for all scikit-optimize releases are linked in this this page.
 .. toctree::
     :maxdepth: 1
 
+    Version 0.9 <whats_new/v0.9.rst>
     Version 0.8 <whats_new/v0.8.rst>
     Version 0.7 <whats_new/v0.7.rst>
     Version 0.6 <whats_new/v0.6.rst>
diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index b2ec63e3c..822ff1cbb 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -2,7 +2,7 @@
 
 .. currentmodule:: skopt
 
-.. _changes_0_8:
+.. _changes_0_8_1:
 
 Version 0.8.1
 =============
@@ -11,6 +11,8 @@ Version 0.8.1
 - |Fix| GaussianProcessRegressor on sklearn 0.23 normalizes the
   variance to 1, which needs to reverted on predict.
 
+.. _changes_0_8:
+
 Version 0.8.0
 =============
 **September 2020**

From 16ef270440f67bbeb78eaa114c4a6ce2f2bd6e69 Mon Sep 17 00:00:00 2001
From: Pamphile ROY <proy@bongfish.com>
Date: Mon, 28 Sep 2020 12:26:31 +0200
Subject: [PATCH 231/265] Refactor skip in Sobol, add warnings and improve doc

---
 skopt/sampler/sobol.py      | 214 ++++++++++++++++++++++--------------
 skopt/tests/test_sampler.py |  17 +--
 2 files changed, 139 insertions(+), 92 deletions(-)

diff --git a/skopt/sampler/sobol.py b/skopt/sampler/sobol.py
index d56110a68..dbb6badaa 100644
--- a/skopt/sampler/sobol.py
+++ b/skopt/sampler/sobol.py
@@ -16,6 +16,9 @@
 """
 
 from __future__ import division
+
+import warnings
+
 import numpy as np
 from scipy.stats import norm
 from .base import InitialPointGenerator
@@ -24,56 +27,81 @@
 
 
 class Sobol(InitialPointGenerator):
-    """Generates a new quasirandom Sobol vector with each call.
-
-    The routine adapts the ideas of Antonov and Saleev.
+    """Generates a new quasirandom Sobol' vector with each call.
 
     Parameters
     ----------
-    min_skip : int
-        minimum skipped seed number. When `min_skip != max_skip`
-        a random number is picked.
-    max_skip : int
-        maximum skipped seed number. When `min_skip != max_skip`
-        a random number is picked.
+    skip : int
+        Skipped seed number.
 
     randomize : bool, default=False
-        When set to True, random shift is applied
+        When set to True, random shift is applied.
+
+    Notes
+    -----
+    Sobol' sequences [1]_ provide :math:`n=2^m` low discrepancy points in
+    :math:`[0,1)^{dim}`. Scrambling them makes them suitable for singular
+    integrands, provides a means of error estimation, and can improve their
+    rate of convergence.
+
+    There are many versions of Sobol' sequences depending on their
+    'direction numbers'. Here, the maximum number of dimension is 40.
+
+    The routine adapts the ideas of Antonov and Saleev [2]_.
+
+    .. warning::
+
+       Sobol' sequences are a quadrature rule and they lose their balance
+       properties if one uses a sample size that is not a power of 2, or skips
+       the first point, or thins the sequence [5]_.
+
+       If :math:`n=2^m` points are not enough then one should take :math:`2^M`
+       points for :math:`M>m`. When scrambling, the number R of independent
+       replicates does not have to be a power of 2.
+
+       Sobol' sequences are generated to some number :math:`B` of bits. Then
+       after :math:`2^B` points have been generated, the sequence will repeat.
+       Currently :math:`B=30`.
 
     References
     ----------
-    Antonov, Saleev,
-    USSR Computational Mathematics and Mathematical Physics,
-    Volume 19, 1980, pages 252 - 256.
-
-    Paul Bratley, Bennett Fox,
-    Algorithm 659:
-    Implementing Sobol's Quasirandom Sequence Generator,
-    ACM Transactions on Mathematical Software,
-    Volume 14, Number 1, pages 88-100, 1988.
-
-    Bennett Fox,
-    Algorithm 647:
-    Implementation and Relative Efficiency of Quasirandom
-    Sequence Generators,
-    ACM Transactions on Mathematical Software,
-    Volume 12, Number 4, pages 362-376, 1986.
-
-    Ilya Sobol,
-    USSR Computational Mathematics and Mathematical Physics,
-    Volume 16, pages 236-242, 1977.
-
-    Ilya Sobol, Levitan,
-    The Production of Points Uniformly Distributed in a Multidimensional
-    Cube (in Russian),
-    Preprint IPM Akad. Nauk SSSR,
-    Number 40, Moscow 1976.
-    """
-    def __init__(self, min_skip=0, max_skip=1000, randomize=False):
+    .. [1] I. M. Sobol. The distribution of points in a cube and the accurate
+       evaluation of integrals. Zh. Vychisl. Mat. i Mat. Phys., 7:784-802,
+       1967.
+
+    .. [2] Antonov, Saleev,
+       USSR Computational Mathematics and Mathematical Physics,
+       Volume 19, 1980, pages 252 - 256.
+
+    .. [3] Paul Bratley, Bennett Fox,
+       Algorithm 659:
+       Implementing Sobol's Quasirandom Sequence Generator,
+       ACM Transactions on Mathematical Software,
+       Volume 14, Number 1, pages 88-100, 1988.
 
-        self.min_skip = min_skip
-        self.max_skip = max_skip
+    .. [4] Bennett Fox,
+       Algorithm 647:
+       Implementation and Relative Efficiency of Quasirandom
+       Sequence Generators,
+
+    .. [5] Art B. Owen. On dropping the first Sobol' point. arXiv 2008.08051,
+       2020.
+
+    """
+    def __init__(self, skip=0, randomize=True):
+
+        if not (skip & (skip - 1) == 0):
+            raise ValueError("The balance properties of Sobol' points require"
+                             " skipping a power of 2.")
+        if skip != 0:
+            warnings.warn(f"{skip} points have been skipped: "
+                          f"{skip} points can be generated before the "
+                          f"sequence repeats.")
+        self.skip = skip
+
+        self.num_generated = 0
         self.randomize = randomize
+
         self.dim_max = 40
         self.log_max = 30
         self.atmost = 2 ** self.log_max - 1
@@ -84,7 +112,6 @@ def __init__(self, min_skip=0, max_skip=1000, randomize=False):
         self.seed_save = -1
         self.v = np.zeros((self.dim_max, self.log_max))
         self.dim_num_save = -1
-        self.initialized = 1
 
     def init(self, dim_num):
         self.dim_num_save = dim_num
@@ -139,11 +166,10 @@ def init(self, dim_num):
 
         #  Check parameters.
         if dim_num < 1 or self.dim_max < dim_num:
-            print('I4_SOBOL - Fatal error!')
-            print('  The spatial dimension DIM_NUM should satisfy:')
-            print('    1 <= DIM_NUM <= %d' % self.dim_max)
-            print('  But this input value is DIM_NUM = %d' % dim_num)
-            return
+            raise ValueError(f'I4_SOBOL - Fatal error!\n'
+                             f' The spatial dimension DIM_NUM should satisfy:\n'
+                             f' 1 <= DIM_NUM <= {self.dim_max}\n'
+                             f' But this input value is DIM_NUM = {dim_num}')
 
         #  Initialize the remaining rows of V.
         for i in range(2, dim_num + 1):
@@ -188,7 +214,7 @@ def init(self, dim_num):
         self.lastq = np.zeros(dim_num)
 
     def generate(self, dimensions, n_samples, random_state=None):
-        """Creates samples from Sobol set.
+        """Creates samples from Sobol' set.
 
         Parameters
         ----------
@@ -204,57 +230,74 @@ def generate(self, dimensions, n_samples, random_state=None):
             - an instance of a `Dimension` object (`Real`, `Integer` or
               `Categorical`).
         n_samples : int
-            The order of the Sobol sequence. Defines the number of samples.
+            The order of the Sobol' sequence. Defines the number of samples.
         random_state : int, RandomState instance, or None (default)
             Set random state to something other than None for reproducible
             results.
 
         Returns
         -------
-        np.array, shape=(n_dim, n_samples)
-            Sobol set
+        sample : array_like (n_samples, dim)
+            Sobol' set.
+
         """
+        total_n_samples = self.num_generated + n_samples
+        if not (total_n_samples & (total_n_samples - 1) == 0):
+            warnings.warn("The balance properties of Sobol' points require "
+                          "n to be a power of 2. {0} points have been "
+                          "previously generated, then: n={0}+{1}={2}. "
+                          .format(self.num_generated, n_samples,
+                                  total_n_samples))
+        if self.skip != 0 and total_n_samples > self.skip:
+            raise ValueError(f"{self.skip} points have been skipped: generating "
+                             f"{n_samples} more points would cause the "
+                             f"sequence to repeat.")
+
         rng = check_random_state(random_state)
         space = Space(dimensions)
         n_dim = space.n_dims
         transformer = space.get_transformer()
         space.set_transformer("normalize")
         r = np.full((n_samples, n_dim), np.nan)
-        if self.min_skip == self.max_skip:
-            seed = self.min_skip
-        else:
-            seed = rng.randint(self.min_skip, self.max_skip)
+
+        seed = self.skip
         for j in range(n_samples):
             r[j, 0:n_dim], seed = self._sobol(n_dim, seed)
+
         if self.randomize:
-            r = space.inverse_transform(_random_shift(r, rng))
+            r = _random_shift(r, rng)
+
         r = space.inverse_transform(r)
         space.set_transformer(transformer)
+
+        self.num_generated += n_samples
+
         return r
 
     def _sobol(self, dim_num, seed):
-        """Generates a new quasirandom Sobol vector with each call.
+        """Generates a new quasirandom Sobol' vector with each call.
 
         Parameters
         ----------
         dim_num : int
-          number of spatial dimensions.
+          Number of spatial dimensions.
           `dim_num` must satisfy 1 <= DIM_NUM <= 40.
 
         seed : int
-          the "seed" for the sequence.
+          the `seed` for the sequence.
           This is essentially the index in the sequence of the quasirandom
-          value to be generated.  On output, SEED has been set to the
-          appropriate next value, usually simply SEED+1.
-          If SEED is less than 0 on input, it is treated as though it were 0.
+          value to be generated. On output, `seed` has been set to the
+          appropriate next value, usually simply `seed`+1.
+          If `seed` is less than 0 on input, it is treated as though it were 0.
           An input value of 0 requests the first (0-th) element of
           the sequence.
 
         Returns
         -------
-        the next quasirandom vector.
-        """
+        vector, seed : np.array (n_dim,), int
+            The next quasirandom vector and the seed of its next vector.
 
+        """
         #  Things to do only if the dimension changed.
         if dim_num != self.dim_num_save:
             self.init(dim_num)
@@ -300,11 +343,10 @@ def _sobol(self, dim_num, seed):
 
         #  Check that the user is not calling too many times!
         if self.maxcol < pos_lo0:
-            print('I4_SOBOL - Fatal error!')
-            print('  Too many calls!')
-            print('  MAXCOL = %d\n' % self.maxcol)
-            print('  L =      %d\n' % pos_lo0)
-            return
+            raise ValueError(f'I4_SOBOL - Fatal error!\n'
+                             f' Too many calls!\n'
+                             f' MAXCOL = {self.maxcol}\n'
+                             f' L =      {pos_lo0}\n')
 
         #  Calculate the new components of QUASI.
         quasi = np.zeros(dim_num)
@@ -320,13 +362,13 @@ def _sobol(self, dim_num, seed):
 
 
 def _bit_hi1(n):
-    """
-    Returns the position of the high 1 bit base 2 in an integer.
+    """Returns the position of the high 1 bit base 2 in an integer.
 
     Parameters
     ----------
     n : int
-        input, should be positive
+        Input, should be positive.
+
     """
     bin_repr = np.binary_repr(n)
     most_left_one = bin_repr.find('1')
@@ -337,13 +379,12 @@ def _bit_hi1(n):
 
 
 def _bit_lo0(n):
-    """
-    Returns the position of the low 0 bit base 2 in an integer.
+    """Returns the position of the low 0 bit base 2 in an integer.
 
     Parameters
     ----------
     n : int
-        input, should be positive
+        Input, should be positive.
 
     """
     bin_repr = np.binary_repr(n)
@@ -354,27 +395,30 @@ def _bit_lo0(n):
 
 
 def _random_shift(dm, random_state=None):
-    """Random shifting of a vector
-    Randomization of the quasi-MC samples can be achieved
-    in the easiest manner by
-    random shift (or the Cranley-Patterson rotation).
-    Refereences
+    """Random shifting of a vector.
+
+    Randomization of the quasi-MC samples can be achieved in the easiest manner
+    by random shift (or the Cranley-Patterson rotation).
+
+    References
     -----------
-    C. Lemieux, "Monte Carlo and Quasi-Monte Carlo Sampling," Springer
-    Series in Statistics 692, Springer Science+Business Media, New York,
-    2009
+    .. [1] C. Lemieux, "Monte Carlo and Quasi-Monte Carlo Sampling," Springer
+       Series in Statistics 692, Springer Science+Business Media, New York,
+       2009
 
     Parameters
     ----------
-    dm : array, shape(n,d)
-        input matrix
+    dm : array, shape(n, d)
+        Input matrix.
     random_state : int, RandomState instance, or None (default)
         Set random state to something other than None for reproducible
         results.
 
     Returns
     -------
-    Randomized Sobol' design matrix
+    dm :  array, shape(n, d)
+        Randomized Sobol' design matrix.
+
     """
     rng = check_random_state(random_state)
     # Generate random shift matrix from uniform distribution
diff --git a/skopt/tests/test_sampler.py b/skopt/tests/test_sampler.py
index e8cafb143..e885956d4 100644
--- a/skopt/tests/test_sampler.py
+++ b/skopt/tests/test_sampler.py
@@ -123,6 +123,8 @@ def test_bit():
 @pytest.mark.fast_test
 def test_sobol():
     sobol = Sobol()
+    x, seed = sobol._sobol(3, 0)
+    assert_array_equal(x, [0., 0., 0.])
     x, seed = sobol._sobol(3, 1)
     assert_array_equal(x, [0.5, 0.5, 0.5])
     x, seed = sobol._sobol(3, 2)
@@ -139,15 +141,16 @@ def test_sobol():
 
 @pytest.mark.fast_test
 def test_generate():
-    sobol = Sobol(min_skip=1, max_skip=1)
-    x = sobol.generate([(0., 1.), ] * 3, 3)
+    sobol = Sobol(randomize=False)
+    x = sobol.generate([(0., 1.), ] * 3, 4)
     x = np.array(x)
-    assert_array_equal(x[0, :], [0.5, 0.5, 0.5])
-    assert_array_equal(x[1, :], [0.75, 0.25, 0.75])
-    assert_array_equal(x[2, :], [0.25, 0.75, 0.25])
+    assert_array_equal(x[0, :], [0., 0., 0.])
+    assert_array_equal(x[1, :], [0.5, 0.5, 0.5])
+    assert_array_equal(x[2, :], [0.75, 0.25, 0.75])
+    assert_array_equal(x[3, :], [0.25, 0.75, 0.25])
 
-    sobol.set_params(max_skip=2)
-    assert sobol.max_skip == 2
+    sobol.set_params(skip=2)
+    assert sobol.skip == 2
     assert isinstance(sobol, InitialPointGenerator)
 
 

From bebab292de53cac6d627d1d9ff4efe0ecb8cb3a2 Mon Sep 17 00:00:00 2001
From: Pamphile ROY <proy@bongfish.com>
Date: Mon, 28 Sep 2020 13:31:14 +0200
Subject: [PATCH 232/265] Fix Sobol' typos in usage

---
 examples/sampler/initial-sampling-method-integer.py | 10 +++++-----
 examples/sampler/initial-sampling-method.py         | 10 +++++-----
 examples/sampler/sampling_comparison.py             |  4 ++--
 skopt/optimizer/base.py                             |  2 +-
 skopt/optimizer/dummy.py                            |  2 +-
 skopt/optimizer/forest.py                           |  2 +-
 skopt/optimizer/gbrt.py                             |  2 +-
 skopt/optimizer/gp.py                               |  2 +-
 skopt/optimizer/optimizer.py                        |  2 +-
 9 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/examples/sampler/initial-sampling-method-integer.py b/examples/sampler/initial-sampling-method-integer.py
index c66a9508b..6d775a087 100644
--- a/examples/sampler/initial-sampling-method-integer.py
+++ b/examples/sampler/initial-sampling-method-integer.py
@@ -13,7 +13,7 @@
 (10 by default). The downside to this is that there is no guarantee that
 these samples are spread out evenly across all the dimensions.
 
-Sampling methods as Latin hypercube, Sobol, Halton and Hammersly
+Sampling methods as Latin hypercube, Sobol', Halton and Hammersly
 take advantage of the fact that we know beforehand how many random
 points we want to sample. Then these points can be "spread out" in
 such a way that each dimension is explored.
@@ -64,15 +64,15 @@ def plot_searchspace(x, title):
 x_label.append("random")
 
 #############################################################################
-# Sobol
-# -----
+# Sobol'
+# ------
 
 sobol = Sobol()
 x = sobol.generate(space.dimensions, n_samples)
-plot_searchspace(x, 'Sobol')
+plot_searchspace(x, "Sobol'")
 print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
 pdist_data.append(pdist(x).flatten())
-x_label.append("sobol")
+x_label.append("sobol'")
 
 #############################################################################
 # Classic latin hypercube sampling
diff --git a/examples/sampler/initial-sampling-method.py b/examples/sampler/initial-sampling-method.py
index ad0e7f903..ba734742f 100644
--- a/examples/sampler/initial-sampling-method.py
+++ b/examples/sampler/initial-sampling-method.py
@@ -14,7 +14,7 @@
 (10 by default). The downside to this is that there is no guarantee that
 these samples are spread out evenly across all the dimensions.
 
-Sampling methods as Latin hypercube, Sobol, Halton and Hammersly
+Sampling methods as Latin hypercube, Sobol', Halton and Hammersly
 take advantage of the fact that we know beforehand how many random
 points we want to sample. Then these points can be "spread out" in
 such a way that each dimension is explored.
@@ -64,14 +64,14 @@ def plot_searchspace(x, title):
 x_label.append("random")
 
 #############################################################################
-# Sobol
-# -----
+# Sobol'
+# ------
 
 sobol = Sobol()
 x = sobol.generate(space.dimensions, n_samples)
-plot_searchspace(x, 'Sobol')
+plot_searchspace(x, "Sobol'")
 pdist_data.append(pdist(x).flatten())
-x_label.append("sobol")
+x_label.append("sobol'")
 
 #############################################################################
 # Classic Latin hypercube sampling
diff --git a/examples/sampler/sampling_comparison.py b/examples/sampler/sampling_comparison.py
index 819f5e06d..a52d92c0f 100644
--- a/examples/sampler/sampling_comparison.py
+++ b/examples/sampler/sampling_comparison.py
@@ -14,7 +14,7 @@
 
 * Halton sequence,
 * Hammersly sequence,
-* Sobol sequence and
+* Sobol' sequence and
 * Latin hypercube sampling
 
 as initial points. The purely random point generation is used as
@@ -153,7 +153,7 @@ def run_measure(initial_point_generator, n_initial_points=10):
 plot = plot_convergence([("random", dummy_res),
                         ("lhs", lhs_res),
                         ("lhs_maximin", lhs2_res),
-                        ("sobol", sobol_res),
+                        ("sobol'", sobol_res),
                         ("halton", halton_res),
                         ("hammersly", hammersly_res),
                         ("grid", grid_res)],
diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index 8cde60774..ac26bd66e 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -84,7 +84,7 @@ def base_minimize(func, dimensions, base_estimator,
         Sets a initial points generator. Can be either
 
         - `"random"` for uniform random numbers,
-        - `"sobol"` for a Sobol sequence,
+        - `"sobol"` for a Sobol' sequence,
         - `"halton"` for a Halton sequence,
         - `"hammersly"` for a Hammersly sequence,
         - `"lhs"` for a latin hypercube sequence,
diff --git a/skopt/optimizer/dummy.py b/skopt/optimizer/dummy.py
index 71902ee97..c372c243b 100644
--- a/skopt/optimizer/dummy.py
+++ b/skopt/optimizer/dummy.py
@@ -40,7 +40,7 @@ def dummy_minimize(func, dimensions, n_calls=100,
         Sets a initial points generator. Can be either
 
         - `"random"` for uniform random numbers,
-        - `"sobol"` for a Sobol sequence,
+        - `"sobol"` for a Sobol' sequence,
         - `"halton"` for a Halton sequence,
         - `"hammersly"` for a Hammersly sequence,
         - `"lhs"` for a latin hypercube sequence,
diff --git a/skopt/optimizer/forest.py b/skopt/optimizer/forest.py
index 9ceada655..455a04cf0 100644
--- a/skopt/optimizer/forest.py
+++ b/skopt/optimizer/forest.py
@@ -92,7 +92,7 @@ def forest_minimize(func, dimensions, base_estimator="ET", n_calls=100,
         Sets a initial points generator. Can be either
 
         - `"random"` for uniform random numbers,
-        - `"sobol"` for a Sobol sequence,
+        - `"sobol"` for a Sobol' sequence,
         - `"halton"` for a Halton sequence,
         - `"hammersly"` for a Hammersly sequence,
         - `"lhs"` for a latin hypercube sequence,
diff --git a/skopt/optimizer/gbrt.py b/skopt/optimizer/gbrt.py
index bd11aa854..de43d6fcc 100644
--- a/skopt/optimizer/gbrt.py
+++ b/skopt/optimizer/gbrt.py
@@ -79,7 +79,7 @@ def gbrt_minimize(func, dimensions, base_estimator=None,
         Sets a initial points generator. Can be either
 
         - `"random"` for uniform random numbers,
-        - `"sobol"` for a Sobol sequence,
+        - `"sobol"` for a Sobol' sequence,
         - `"halton"` for a Halton sequence,
         - `"hammersly"` for a Hammersly sequence,
         - `"lhs"` for a latin hypercube sequence,
diff --git a/skopt/optimizer/gp.py b/skopt/optimizer/gp.py
index 83aebb731..67935a8e8 100644
--- a/skopt/optimizer/gp.py
+++ b/skopt/optimizer/gp.py
@@ -100,7 +100,7 @@ def gp_minimize(func, dimensions, base_estimator=None,
         Sets a initial points generator. Can be either
 
         - `"random"` for uniform random numbers,
-        - `"sobol"` for a Sobol sequence,
+        - `"sobol"` for a Sobol' sequence,
         - `"halton"` for a Halton sequence,
         - `"hammersly"` for a Hammersly sequence,
         - `"lhs"` for a latin hypercube sequence,
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index a2c11e322..8aa32169d 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -76,7 +76,7 @@ class Optimizer(object):
         Sets a initial points generator. Can be either
 
         - `"random"` for uniform random numbers,
-        - `"sobol"` for a Sobol sequence,
+        - `"sobol"` for a Sobol' sequence,
         - `"halton"` for a Halton sequence,
         - `"hammersly"` for a Hammersly sequence,
         - `"lhs"` for a latin hypercube sequence,

From 2cde042dbd4527bd5fbce437ebf1d8beeac4d9b7 Mon Sep 17 00:00:00 2001
From: Pamphile ROY <proy@bongfish.com>
Date: Mon, 28 Sep 2020 13:38:49 +0200
Subject: [PATCH 233/265] Fix linting errors and CI versions not matching

---
 .travis.yml            |  8 ++++----
 skopt/sampler/sobol.py | 11 ++++++-----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 37be15dd7..d94a52c75 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,13 +20,13 @@ matrix:
     # installed from their CI wheels in a virtualenv with the Python
     # interpreter provided by travis.
     - name: "Python 3.6 - scikit 0.20.4"
-      python: "3.7"
-      env: DISTRIB="conda" PYTHON_VERSION="3.5"
+      python: "3.6"
+      env: DISTRIB="conda" PYTHON_VERSION="3.6"
            NUMPY_VERSION="1.13.3" SCIPY_VERSION="0.19.1" PYAML_VERSION="16.9.0"
            SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
            JOBLIB_VERSION="0.11"
     - name: "Python 3.6 - scikit 0.21.3"
-      python: "3.7"
+      python: "3.6"
       env: DISTRIB="conda" PYTHON_VERSION="3.6"
            NUMPY_VERSION="1.14.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="16.12.0"
            SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
@@ -38,7 +38,7 @@ matrix:
          SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true"
          JOBLIB_VERSION="0.13"
     - name: "Python 3.8  - scikit 0.23.2"
-      python: "3.7"
+      python: "3.8"
       env: DISTRIB="conda" PYTHON_VERSION="3.8.1" COVERAGE="false"
            NUMPY_VERSION="1.19.1" SCIPY_VERSION="1.5.2" PYAML_VERSION="20.4.0"
            SCIKIT_LEARN_VERSION="0.23.2" JOBLIB_VERSION="0.16.0"
diff --git a/skopt/sampler/sobol.py b/skopt/sampler/sobol.py
index dbb6badaa..a148003c4 100644
--- a/skopt/sampler/sobol.py
+++ b/skopt/sampler/sobol.py
@@ -20,7 +20,6 @@
 import warnings
 
 import numpy as np
-from scipy.stats import norm
 from .base import InitialPointGenerator
 from ..space import Space
 from sklearn.utils import check_random_state
@@ -167,9 +166,10 @@ def init(self, dim_num):
         #  Check parameters.
         if dim_num < 1 or self.dim_max < dim_num:
             raise ValueError(f'I4_SOBOL - Fatal error!\n'
-                             f' The spatial dimension DIM_NUM should satisfy:\n'
-                             f' 1 <= DIM_NUM <= {self.dim_max}\n'
-                             f' But this input value is DIM_NUM = {dim_num}')
+                             f'  The spatial dimension DIM_NUM should '
+                             f'satisfy:\n'
+                             f'  1 <= DIM_NUM <= {self.dim_max}\n'
+                             f'  But this input value is DIM_NUM = {dim_num}')
 
         #  Initialize the remaining rows of V.
         for i in range(2, dim_num + 1):
@@ -249,7 +249,8 @@ def generate(self, dimensions, n_samples, random_state=None):
                           .format(self.num_generated, n_samples,
                                   total_n_samples))
         if self.skip != 0 and total_n_samples > self.skip:
-            raise ValueError(f"{self.skip} points have been skipped: generating "
+            raise ValueError(f"{self.skip} points have been skipped: "
+                             f"generating "
                              f"{n_samples} more points would cause the "
                              f"sequence to repeat.")
 

From 79ae9a58f8b21829fa53c7403f75dfe77f012115 Mon Sep 17 00:00:00 2001
From: Pamphile ROY <proy@bongfish.com>
Date: Tue, 29 Sep 2020 09:01:23 +0200
Subject: [PATCH 234/265] Fix zero in Halton, fix Van Der Corput and fix tests

---
 skopt/sampler/halton.py     | 24 ++++++++++++++----------
 skopt/sampler/hammersly.py  | 13 ++++++++-----
 skopt/tests/test_plots.py   |  6 +++---
 skopt/tests/test_sampler.py | 31 ++++++++++++++++---------------
 4 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/skopt/sampler/halton.py b/skopt/sampler/halton.py
index 55b48ffb6..7a3eeb873 100644
--- a/skopt/sampler/halton.py
+++ b/skopt/sampler/halton.py
@@ -10,6 +10,7 @@
 
 class Halton(InitialPointGenerator):
     """Creates `Halton` sequence samples.
+
     In statistics, Halton sequences are sequences used to generate
     points in space for numerical methods such as Monte Carlo simulations.
     Although these sequences are deterministic, they are of low discrepancy,
@@ -23,16 +24,17 @@ class Halton(InitialPointGenerator):
     Parameters
     ----------
     min_skip : int
-        minimum skipped seed number. When `min_skip != max_skip`
+        Minimum skipped seed number. When `min_skip != max_skip`
         a random number is picked.
     max_skip : int
-        maximum skipped seed number. When `min_skip != max_skip`
+        Maximum skipped seed number. When `min_skip != max_skip`
         a random number is picked.
     primes : tuple, default=None
         The (non-)prime base to calculate values along each axis. If
         empty or None, growing prime values starting from 2 will be used.
+
     """
-    def __init__(self, min_skip=-1, max_skip=-1, primes=None):
+    def __init__(self, min_skip=0, max_skip=0, primes=None):
         self.primes = primes
         self.min_skip = min_skip
         self.max_skip = max_skip
@@ -62,7 +64,8 @@ def generate(self, dimensions, n_samples, random_state=None):
         Returns
         -------
         np.array, shape=(n_dim, n_samples)
-            Halton set
+            Halton set.
+
         """
         rng = check_random_state(random_state)
         if self.primes is None:
@@ -81,10 +84,11 @@ def generate(self, dimensions, n_samples, random_state=None):
 
             primes = primes[:n_dim]
         assert len(primes) == n_dim, "not enough primes"
-        if self.min_skip < 0 and self.max_skip < 0:
-            skip = max(primes)
-        elif self.min_skip == self.max_skip:
+
+        if self.min_skip == self.max_skip:
             skip = self.min_skip
+        elif self.min_skip < 0 and self.max_skip < 0:
+            skip = max(primes)
         elif self.min_skip < 0 or self.max_skip < 0:
             skip = np.max(self.min_skip, self.max_skip)
         else:
@@ -101,8 +105,7 @@ def generate(self, dimensions, n_samples, random_state=None):
 
 
 def _van_der_corput_samples(idx, number_base=2):
-    """
-    Create `Van Der Corput` low discrepancy sequence samples.
+    """Create `Van Der Corput` low discrepancy sequence samples.
 
     A van der Corput sequence is an example of the simplest one-dimensional
     low-discrepancy sequence over the unit interval; it was first described in
@@ -125,10 +128,11 @@ def _van_der_corput_samples(idx, number_base=2):
     -------
     float, numpy.ndarray
         Van der Corput samples.
+
     """
     assert number_base > 1
 
-    idx = np.asarray(idx).flatten() + 1
+    idx = np.asarray(idx).flatten()
     out = np.zeros(len(idx), dtype=float)
 
     base = float(number_base)
diff --git a/skopt/sampler/hammersly.py b/skopt/sampler/hammersly.py
index 9f91c73f8..5de6c6e99 100644
--- a/skopt/sampler/hammersly.py
+++ b/skopt/sampler/hammersly.py
@@ -11,6 +11,7 @@
 
 class Hammersly(InitialPointGenerator):
     """Creates `Hammersley` sequence samples.
+
     The Hammersley set is equivalent to the Halton sequence, except for one
     dimension is replaced with a regular grid. It is not recommended to
     generate a Hammersley sequence with more than 10 dimension.
@@ -25,16 +26,17 @@ class Hammersly(InitialPointGenerator):
     Parameters
     ----------
     min_skip : int, default=-1
-        minimum skipped seed number. When `min_skip != max_skip` and
+        Minimum skipped seed number. When `min_skip != max_skip` and
         both are > -1, a random number is picked.
     max_skip : int, default=-1
-        maximum skipped seed number. When `min_skip != max_skip` and
+        Maximum skipped seed number. When `min_skip != max_skip` and
         both are > -1, a random number is picked.
     primes : tuple, default=None
         The (non-)prime base to calculate values along each axis. If
         empty, growing prime values starting from 2 will be used.
+
     """
-    def __init__(self, min_skip=-1, max_skip=-1, primes=None):
+    def __init__(self, min_skip=0, max_skip=0, primes=None):
         self.primes = primes
         self.min_skip = min_skip
         self.max_skip = max_skip
@@ -65,7 +67,8 @@ def generate(self, dimensions, n_samples, random_state=None):
         Returns
         -------
         np.array, shape=(n_dim, n_samples)
-            Hammersley set
+            Hammersley set.
+
         """
         rng = check_random_state(random_state)
         halton = Halton(min_skip=self.min_skip, max_skip=self.max_skip,
@@ -83,7 +86,7 @@ def generate(self, dimensions, n_samples, random_state=None):
                 [(0., 1.), ] * (n_dim - 1), n_samples,
                 random_state=rng)).T
 
-            out[n_dim - 1] = np.linspace(0, 1, n_samples + 2)[1:-1]
+            out[n_dim - 1] = np.linspace(0, 1, n_samples + 1)[:-1]
             out = space.inverse_transform(out.T)
         space.set_transformer(transformer)
         return out
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index 28333a42a..f29df8d35 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -52,14 +52,14 @@ def objective(params):
     xi, yi = partial_dependence_1D(res.space, res.models[-1], 0,
                                    samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
-    assert_array_almost_equal(yi, yi_, 1e-3)
+    assert_array_almost_equal(yi, yi_, 3)
 
     xi_ = [0, 1]
     yi_ = [-0.9241087603770617, -0.9240188905968352]
     xi, yi = partial_dependence_1D(res.space, res.models[-1], 4,
                                    samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
-    assert_array_almost_equal(yi, yi_, 1e-3)
+    assert_array_almost_equal(yi, yi_, 3)
 
     xi_ = [0, 1]
     yi_ = [1., 10.5, 20.]
@@ -70,7 +70,7 @@ def objective(params):
                                        samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
     assert_array_almost_equal(yi, yi_)
-    assert_array_almost_equal(zi, zi_, 1e-3)
+    assert_array_almost_equal(zi, zi_, 3)
 
     x_min, f_min = expected_minimum_random_sampling(res, random_state=1)
     x_min2, f_min2 = expected_minimum(res, random_state=1)
diff --git a/skopt/tests/test_sampler.py b/skopt/tests/test_sampler.py
index e885956d4..49659ba52 100644
--- a/skopt/tests/test_sampler.py
+++ b/skopt/tests/test_sampler.py
@@ -156,27 +156,31 @@ def test_generate():
 
 @pytest.mark.fast_test
 def test_van_der_corput():
-    x = _van_der_corput_samples(range(11), number_base=10)
-    y = [0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9, 0.01, 0.11]
+    x = _van_der_corput_samples(range(12), number_base=10)
+    y = [0., 0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9, 0.01, 0.11]
     assert_array_equal(x, y)
 
-    x = _van_der_corput_samples(range(8), number_base=2)
-    y = [0.5, 0.25, 0.75, 0.125, 0.625, 0.375, 0.875, 0.0625]
+    x = _van_der_corput_samples(range(9), number_base=2)
+    y = [0., 0.5, 0.25, 0.75, 0.125, 0.625, 0.375, 0.875, 0.0625]
     assert_array_equal(x, y)
 
 
 @pytest.mark.fast_test
 def test_halton():
     h = Halton()
-    x = h.generate([(0., 1.), ] * 2, 3)
-    y = np.array([[0.125, 0.625, 0.375], [0.4444, 0.7778, 0.2222]]).T
-    assert_array_almost_equal(x, y, 1e-3)
+    x = h.generate([(0., 1.), ], 9)
+    y = _van_der_corput_samples(range(9), number_base=2)
+    assert_array_almost_equal(np.array(x).flatten(), y)
 
     h = Halton()
-    x = h.generate([(0., 1.), ] * 2, 4)
-    y = np.array([[0.125, 0.625, 0.375, 0.875],
-                  [0.4444, 0.7778, 0.2222, 0.5556]]).T
-    assert_array_almost_equal(x, y, 1e-3)
+    x = h.generate([(0., 1.), ] * 2, 6)
+    y = np.array([[0, 0], [1 / 2, 1 / 3], [1 / 4, 2 / 3], [3 / 4, 1 / 9],
+                  [1 / 8, 4 / 9], [5 / 8, 7 / 9]])
+    assert_array_almost_equal(x, y)
+
+    h = Halton(min_skip=0, max_skip=3)
+    x = h.generate([(0., 1.), ] * 2, 4, random_state=12345)
+    assert_array_almost_equal(x, y[2:])
 
     samples = h.generate([(0., 1.), ] * 2, 200)
     assert len(samples) == 200
@@ -186,11 +190,8 @@ def test_halton():
 @pytest.mark.fast_test
 def test_hammersly():
     h = Hammersly()
-    x = h.generate([(0., 1.), ] * 2, 3)
-    y = np.array([[0.75, 0.125, 0.625], [0.25, 0.5, 0.75]]).T
-    assert_almost_equal(x, y)
     x = h.generate([(0., 1.), ] * 2, 4)
-    y = np.array([[0.75, 0.125, 0.625, 0.375], [0.2, 0.4, 0.6, 0.8]]).T
+    y = np.array([[0, 0], [1 / 2, 0.25], [1 / 4, 0.5], [3 / 4, 0.75]])
     assert_almost_equal(x, y)
 
     samples = h.generate([(0., 1.), ] * 2, 200)

From cd5ccaf5796ddf315691e32a7e8fe9b58ea66659 Mon Sep 17 00:00:00 2001
From: Pamphile ROY <proy@bongfish.com>
Date: Tue, 29 Sep 2020 09:21:16 +0200
Subject: [PATCH 235/265] Fix tol in some plot tests

---
 skopt/tests/test_plots.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index f29df8d35..3fb57d94c 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -52,14 +52,14 @@ def objective(params):
     xi, yi = partial_dependence_1D(res.space, res.models[-1], 0,
                                    samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
-    assert_array_almost_equal(yi, yi_, 3)
+    assert_array_almost_equal(yi, yi_, 2)
 
     xi_ = [0, 1]
     yi_ = [-0.9241087603770617, -0.9240188905968352]
     xi, yi = partial_dependence_1D(res.space, res.models[-1], 4,
                                    samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
-    assert_array_almost_equal(yi, yi_, 3)
+    assert_array_almost_equal(yi, yi_, 2)
 
     xi_ = [0, 1]
     yi_ = [1., 10.5, 20.]
@@ -70,7 +70,7 @@ def objective(params):
                                        samples, n_points=3)
     assert_array_almost_equal(xi, xi_)
     assert_array_almost_equal(yi, yi_)
-    assert_array_almost_equal(zi, zi_, 3)
+    assert_array_almost_equal(zi, zi_, 2)
 
     x_min, f_min = expected_minimum_random_sampling(res, random_state=1)
     x_min2, f_min2 = expected_minimum(res, random_state=1)

From 530da127c0e3d92fc5018115585e73fecced12a5 Mon Sep 17 00:00:00 2001
From: Tim Gates <tim.gates@iress.com>
Date: Thu, 31 Dec 2020 23:07:52 +1100
Subject: [PATCH 236/265] docs: fix simple typo, stategies -> strategies (#979)

There is a small typo in doc/whats_new/v0.4.rst.

Should read `strategies` rather than `stategies`.
---
 doc/whats_new/v0.4.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.4.rst b/doc/whats_new/v0.4.rst
index 961cb1f34..44372a512 100644
--- a/doc/whats_new/v0.4.rst
+++ b/doc/whats_new/v0.4.rst
@@ -8,7 +8,7 @@ New features
 
 * Support early stopping of optimization loop.
 * Benchmarking scripts to evaluate performance of different surrogate models.
-* Support for parallel evaluations of the objective function via several   constant liar stategies.
+* Support for parallel evaluations of the objective function via several   constant liar strategies.
 * BayesSearchCV as a drop in replacement for scikit-learn's GridSearchCV.
 * New acquisition functions "EIps" and "PIps" that takes into account function compute time.
 

From 0d3173496cee5485c7815b3ba23dfedc276649bb Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Sat, 3 Apr 2021 11:30:32 +0200
Subject: [PATCH 237/265] Correct documented default is_int for Normalize.

Fixes issue #1016.
---
 skopt/space/transformers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/space/transformers.py b/skopt/space/transformers.py
index 44539aafd..a8e2944da 100644
--- a/skopt/space/transformers.py
+++ b/skopt/space/transformers.py
@@ -230,7 +230,7 @@ class Normalize(Transformer):
     high : float
         Higher bound.
 
-    is_int : bool, default=True
+    is_int : bool, default=False
         Round and cast the return value of `inverse_transform` to integer. Set
         to `True` when applying this transform to integers.
     """

From d01734e7d11452edcfee5dc12ca684ade7c888d9 Mon Sep 17 00:00:00 2001
From: Guillaume SIMO <45049080+GuillaumeSimo@users.noreply.github.com>
Date: Thu, 11 Feb 2021 15:32:58 +0100
Subject: [PATCH 238/265] add ThresholdStopper()

linked to #999
---
 skopt/callbacks.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/skopt/callbacks.py b/skopt/callbacks.py
index 4c31e6c39..9981e9a5b 100644
--- a/skopt/callbacks.py
+++ b/skopt/callbacks.py
@@ -271,6 +271,21 @@ def _criterion(self, result):
             return None
 
 
+class ThresholdStopper(EarlyStopper):
+    """
+    Stop the optimization if the best func_vals is lower than the given threshold
+    """
+    def __init__(self, threshold: float) -> bool:
+        super(EarlyStopper, self).__init__()
+        self.threshold = threshold
+
+    def _criterion(self, result):
+        func_vals = np.sort(result.func_vals)
+        best = func_vals[0]
+        # True if best < threshold given by the user
+        return best <= self.threshold
+
+
 class CheckpointSaver(object):
     """
     Save current state after each iteration with :class:`skopt.dump`.

From 43930df37bdd68fa2d54e52325460221aea98a70 Mon Sep 17 00:00:00 2001
From: Guillaume SIMO <45049080+GuillaumeSimo@users.noreply.github.com>
Date: Thu, 11 Feb 2021 15:36:21 +0100
Subject: [PATCH 239/265] fix max characters by line

---
 skopt/callbacks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/skopt/callbacks.py b/skopt/callbacks.py
index 9981e9a5b..dee847aa0 100644
--- a/skopt/callbacks.py
+++ b/skopt/callbacks.py
@@ -273,7 +273,8 @@ def _criterion(self, result):
 
 class ThresholdStopper(EarlyStopper):
     """
-    Stop the optimization if the best func_vals is lower than the given threshold
+    Stop the optimization if the best func_vals is lower than the given
+    threshold
     """
     def __init__(self, threshold: float) -> bool:
         super(EarlyStopper, self).__init__()

From c4a31249405ccc57dd7d3bfb520082f02def5eb1 Mon Sep 17 00:00:00 2001
From: Guillaume SIMO <45049080+GuillaumeSimo@users.noreply.github.com>
Date: Thu, 11 Feb 2021 15:38:44 +0100
Subject: [PATCH 240/265] add test_threshold_stopper()

linked to #999
---
 skopt/tests/test_callbacks.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/skopt/tests/test_callbacks.py b/skopt/tests/test_callbacks.py
index 4ce788bf0..3df24020b 100644
--- a/skopt/tests/test_callbacks.py
+++ b/skopt/tests/test_callbacks.py
@@ -13,6 +13,7 @@
 from skopt.callbacks import DeadlineStopper
 from skopt.callbacks import CheckpointSaver
 from skopt.callbacks import HollowIterationsStopper
+from skopt.callbacks import ThresholdStopper
 
 from skopt.utils import load
 
@@ -35,6 +36,16 @@ def test_deltay_stopper():
     assert deltay(Result([0, 1])) is None
 
 
+@pytest.mark.fast_test
+def test_threshold_stopper():
+    threshold = ThresholdStopper(3.0)
+
+    Result = namedtuple('Result', ['func_vals'])
+
+    assert not threshold(Result([3.1, 4, 4.6, 100]))
+    assert threshold(Result([3.0, 3, 2.9, 0, 0.0]))
+
+
 @pytest.mark.fast_test
 def test_deadline_stopper():
     deadline = DeadlineStopper(0.0001)

From bf9c12355800fe9e272b20a9fcdae150af8a0464 Mon Sep 17 00:00:00 2001
From: Guillaume SIMO <45049080+GuillaumeSimo@users.noreply.github.com>
Date: Fri, 12 Feb 2021 09:16:19 +0100
Subject: [PATCH 241/265] ThresholdStopper() docstring

Co-authored-by: kernc <kerncece@gmail.com>
---
 skopt/callbacks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skopt/callbacks.py b/skopt/callbacks.py
index dee847aa0..4d2a451da 100644
--- a/skopt/callbacks.py
+++ b/skopt/callbacks.py
@@ -273,8 +273,8 @@ def _criterion(self, result):
 
 class ThresholdStopper(EarlyStopper):
     """
-    Stop the optimization if the best func_vals is lower than the given
-    threshold
+    Stop the optimization when the objective value is lower
+    than the given threshold.
     """
     def __init__(self, threshold: float) -> bool:
         super(EarlyStopper, self).__init__()

From c0add5adb0789a2dd95add43395c81f4659ca568 Mon Sep 17 00:00:00 2001
From: Guillaume SIMO <45049080+GuillaumeSimo@users.noreply.github.com>
Date: Fri, 12 Feb 2021 09:35:59 +0100
Subject: [PATCH 242/265] reduce ThresholdStopper() complexity

---
 skopt/callbacks.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/skopt/callbacks.py b/skopt/callbacks.py
index 4d2a451da..bd3596854 100644
--- a/skopt/callbacks.py
+++ b/skopt/callbacks.py
@@ -276,15 +276,12 @@ class ThresholdStopper(EarlyStopper):
     Stop the optimization when the objective value is lower
     than the given threshold.
     """
-    def __init__(self, threshold: float) -> bool:
+    def __init__(self, threshold: float) -> None:
         super(EarlyStopper, self).__init__()
         self.threshold = threshold
 
-    def _criterion(self, result):
-        func_vals = np.sort(result.func_vals)
-        best = func_vals[0]
-        # True if best < threshold given by the user
-        return best <= self.threshold
+    def _criterion(self, result) -> bool:
+        return np.any([val < self.threshold for val in result.func_vals])
 
 
 class CheckpointSaver(object):

From d6c6e07f0888898be3f5cb879e28ad371e217983 Mon Sep 17 00:00:00 2001
From: Guillaume SIMO <45049080+GuillaumeSimo@users.noreply.github.com>
Date: Fri, 12 Feb 2021 09:56:07 +0100
Subject: [PATCH 243/265] from < to <= (ThresholdStopper)

---
 skopt/callbacks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/callbacks.py b/skopt/callbacks.py
index bd3596854..11e6c0bea 100644
--- a/skopt/callbacks.py
+++ b/skopt/callbacks.py
@@ -281,7 +281,7 @@ def __init__(self, threshold: float) -> None:
         self.threshold = threshold
 
     def _criterion(self, result) -> bool:
-        return np.any([val < self.threshold for val in result.func_vals])
+        return np.any([val <= self.threshold for val in result.func_vals])
 
 
 class CheckpointSaver(object):

From 273db8f3d223c4d471761e6d9ad1bf183b1ee6e1 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Sat, 3 Apr 2021 11:44:41 +0200
Subject: [PATCH 244/265] Check whether prior values are valid.

Fixes issue #1017.
---
 skopt/space/space.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index cf90fb867..05701e51a 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -254,6 +254,9 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
         if high <= low:
             raise ValueError("the lower bound {} has to be less than the"
                              " upper bound {}".format(low, high))
+        if prior not in ["uniform", "log-uniform"]:
+            raise ValueError("prior should be 'uniform' or 'log-uniform'"
+                             " got {}".format(prior))
         self.low = low
         self.high = high
         self.prior = prior
@@ -436,6 +439,9 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
         if high <= low:
             raise ValueError("the lower bound {} has to be less than the"
                              " upper bound {}".format(low, high))
+        if prior not in ["uniform", "log-uniform"]:
+            raise ValueError("prior should be 'uniform' or 'log-uniform'"
+                             " got {}".format(prior))
         self.low = low
         self.high = high
         self.prior = prior

From 863575f26a1462fa5b7ef6f67af9feb935861e53 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Tue, 20 Apr 2021 16:59:13 +0200
Subject: [PATCH 245/265] Fix numpy depreciation errors

---
 skopt/learning/gaussian_process/kernels.py | 2 +-
 skopt/space/space.py                       | 8 ++++----
 skopt/space/transformers.py                | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/skopt/learning/gaussian_process/kernels.py b/skopt/learning/gaussian_process/kernels.py
index fa6490b4c..708db26a3 100644
--- a/skopt/learning/gaussian_process/kernels.py
+++ b/skopt/learning/gaussian_process/kernels.py
@@ -378,7 +378,7 @@ def __call__(self, X, Y=None, eval_gradient=False):
 
         if np.iterable(length_scale):
             if len(length_scale) > 1:
-                length_scale = np.asarray(length_scale, dtype=np.float)
+                length_scale = np.asarray(length_scale, dtype=float)
             else:
                 length_scale = float(length_scale[0])
         else:
diff --git a/skopt/space/space.py b/skopt/space/space.py
index 05701e51a..9136ce67f 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -244,13 +244,13 @@ class Real(Dimension):
     name : str or None
         Name associated with the dimension, e.g., "learning rate".
 
-    dtype : str or dtype, default=np.float
+    dtype : str or dtype, default=np.float64
         float type which will be used in inverse_transform,
         can be float.
 
     """
     def __init__(self, low, high, prior="uniform", base=10, transform=None,
-                 name=None, dtype=np.float):
+                 name=None, dtype=float):
         if high <= low:
             raise ValueError("the lower bound {} has to be less than the"
                              " upper bound {}".format(low, high))
@@ -273,8 +273,8 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
                              "or 'float64'"
                              " got {}".format(self.dtype))
         elif isinstance(self.dtype, type) and self.dtype\
-                not in [float, np.float, np.float16, np.float32, np.float64]:
-            raise ValueError("dtype must be float, np.float"
+                not in [float, np.float16, np.float32, np.float64]:
+            raise ValueError("dtype must be float, np.float64"
                              " got {}".format(self.dtype))
 
         if transform is None:
diff --git a/skopt/space/transformers.py b/skopt/space/transformers.py
index a8e2944da..688929529 100644
--- a/skopt/space/transformers.py
+++ b/skopt/space/transformers.py
@@ -89,10 +89,10 @@ def __init__(self, base):
         self._base = base
 
     def transform(self, X):
-        return np.log10(np.asarray(X, dtype=np.float)) / np.log10(self._base)
+        return np.log10(np.asarray(X, dtype=float)) / np.log10(self._base)
 
     def inverse_transform(self, Xt):
-        return self._base ** np.asarray(Xt, dtype=np.float)
+        return self._base ** np.asarray(Xt, dtype=float)
 
 
 class CategoricalEncoder(Transformer):

From 2a49b6a2a2e3044d004395b7802bf9587c17c481 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Tue, 20 Apr 2021 17:04:00 +0200
Subject: [PATCH 246/265] Fix invalid escape sequence warning

---
 skopt/learning/gaussian_process/kernels.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/learning/gaussian_process/kernels.py b/skopt/learning/gaussian_process/kernels.py
index 708db26a3..c0d55a211 100644
--- a/skopt/learning/gaussian_process/kernels.py
+++ b/skopt/learning/gaussian_process/kernels.py
@@ -316,7 +316,7 @@ def gradient_x(self, x, X_train):
 
 class HammingKernel(sk_StationaryKernelMixin, sk_NormalizedKernelMixin,
                     Kernel):
-    """
+    r"""
     The HammingKernel is used to handle categorical inputs.
 
     ``K(x_1, x_2) = exp(\sum_{j=1}^{d} -ls_j * (I(x_1j != x_2j)))``

From da2f4c5d9c9da1e59b1489689315d1a005c8b8d2 Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Wed, 21 Apr 2021 09:46:27 +0200
Subject: [PATCH 247/265] use np.issubtype for space validation

---
 skopt/space/space.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index 9136ce67f..bb561e95d 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -244,7 +244,7 @@ class Real(Dimension):
     name : str or None
         Name associated with the dimension, e.g., "learning rate".
 
-    dtype : str or dtype, default=np.float64
+    dtype : str or dtype, default=float
         float type which will be used in inverse_transform,
         can be float.
 
@@ -272,8 +272,8 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
             raise ValueError("dtype must be 'float', 'float16', 'float32'"
                              "or 'float64'"
                              " got {}".format(self.dtype))
-        elif isinstance(self.dtype, type) and self.dtype\
-                not in [float, np.float16, np.float32, np.float64]:
+        elif isinstance(self.dtype, type) and \
+                np.issubdtype(self.dtype, np.floating):
             raise ValueError("dtype must be float, np.float64"
                              " got {}".format(self.dtype))
 

From 84023d9f366d7b2a3476d4da525ccf8c3ca1d38a Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Wed, 21 Apr 2021 11:07:02 +0200
Subject: [PATCH 248/265] issubdtype requires type to be passed in

---
 skopt/space/space.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index bb561e95d..bc79be0fb 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -273,7 +273,7 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
                              "or 'float64'"
                              " got {}".format(self.dtype))
         elif isinstance(self.dtype, type) and \
-                np.issubdtype(self.dtype, np.floating):
+                np.issubdtype(type(self.dtype), np.floating):
             raise ValueError("dtype must be float, np.float64"
                              " got {}".format(self.dtype))
 

From 8c29916e25b3a80cf2727c48114320b3ab4e003b Mon Sep 17 00:00:00 2001
From: Matthias <xmatthias@outlook.com>
Date: Wed, 21 Apr 2021 16:24:29 +0200
Subject: [PATCH 249/265] Update valueError Exception

---
 skopt/space/space.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index bc79be0fb..4335ef9b5 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -273,8 +273,8 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
                              "or 'float64'"
                              " got {}".format(self.dtype))
         elif isinstance(self.dtype, type) and \
-                np.issubdtype(type(self.dtype), np.floating):
-            raise ValueError("dtype must be float, np.float64"
+                not np.issubdtype(self.dtype, np.floating):
+            raise ValueError("dtype must be a np.floating subtype"
                              " got {}".format(self.dtype))
 
         if transform is None:

From c95a3e590d2a73450ada2e5f5ab5eac1bbff0d99 Mon Sep 17 00:00:00 2001
From: kernc <kerncece@gmail.com>
Date: Wed, 21 Apr 2021 16:29:27 +0200
Subject: [PATCH 250/265] Add semicolon

---
 skopt/space/space.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/space/space.py b/skopt/space/space.py
index 4335ef9b5..329b0c18c 100644
--- a/skopt/space/space.py
+++ b/skopt/space/space.py
@@ -274,7 +274,7 @@ def __init__(self, low, high, prior="uniform", base=10, transform=None,
                              " got {}".format(self.dtype))
         elif isinstance(self.dtype, type) and \
                 not np.issubdtype(self.dtype, np.floating):
-            raise ValueError("dtype must be a np.floating subtype"
+            raise ValueError("dtype must be a np.floating subtype;"
                              " got {}".format(self.dtype))
 
         if transform is None:

From 9461bfecc12c8cab7ae18a55e44e1d258b3c09c9 Mon Sep 17 00:00:00 2001
From: kernc <kerncece@gmail.com>
Date: Tue, 4 May 2021 21:52:45 +0200
Subject: [PATCH 251/265] Remove BayesSearchCV(iid=) parameter deprecated in
 sklearn 0.24 (#988)

* Remove BayesSearchCV(iid=) parameter deprecated in sklearn

Fixes https://github.com/scikit-optimize/scikit-optimize/issues/978

* Remove now unused weights= parameter

* Update skopt/searchcv.py

Co-authored-by: Tim Head <betatim@gmail.com>

* Fix BayesSearchCV repr/pprint; Fix changed sklearn.model_selection._validation._fit_and_score

* Update searchcv.py

Further improvements for kernc previous commits.  iid totally removed to prevent troubles with sklearn utils prettyprinting. prettyprinting looks for params when print to repl. Sklearn 0.24 has changed return value after cv, so i changed dict destructuring(a litttle bit dirty)

* Update searchcv.py

* Revert unrelated changes

* PEP8 format; add comment

* Revert reverting "unrelated changes"

This is required to pass tests/test_searchcv.py
with scikit-learn 0.24+.

* Migrate BayesSearchCV to sklearn BaseSearchCV._run_search() API

Fixes https://github.com/scikit-optimize/scikit-optimize/issues/718

* add todo item

* Add WhatsNew entry

Co-authored-by: Tim Head <betatim@gmail.com>
Co-authored-by: bole1 <bolenevod@yandex.ru>
---
 doc/whats_new/v0.9.rst |   7 ++
 skopt/searchcv.py      | 259 ++++++-----------------------------------
 2 files changed, 44 insertions(+), 222 deletions(-)

diff --git a/doc/whats_new/v0.9.rst b/doc/whats_new/v0.9.rst
index 42bec8647..2085fe0b3 100644
--- a/doc/whats_new/v0.9.rst
+++ b/doc/whats_new/v0.9.rst
@@ -7,3 +7,10 @@
 Version 0.9.0
 =============
 **In Development**
+
+:mod:`skopt.searchcv`
+---------------------
+- |Fix| Fix :obj:`skopt.searchcv.BayesSearchCV` for scikit-learn >= 0.24.
+  :pr:`988`
+- |API| Deprecate :class:`skopt.searchcv.BayesSearchCV` parameter `iid=`.
+    :pr:`988`
diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 342952c22..a37bc79ad 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -1,21 +1,17 @@
+import warnings
+
 try:
     from collections.abc import Sized
 except ImportError:
     from collections import Sized
-from collections import defaultdict
-from functools import partial
 
 import numpy as np
 from scipy.stats import rankdata
 
-import sklearn
-from sklearn.base import is_classifier, clone
-from joblib import Parallel, delayed
 from sklearn.model_selection._search import BaseSearchCV
 from sklearn.utils import check_random_state
-from sklearn.utils.fixes import MaskedArray
 
-from sklearn.utils.validation import indexable, check_is_fitted
+from sklearn.utils.validation import check_is_fitted
 try:
     from sklearn.metrics import check_scoring
 except ImportError:
@@ -115,11 +111,6 @@ class BayesSearchCV(BaseSearchCV):
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
 
-    iid : boolean, default=True
-        If True, the data is assumed to be identically distributed across
-        the folds, and the loss minimized is the total loss per sample,
-        and not the mean loss across the folds.
-
     cv : int, cross-validation generator or an iterable, optional
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -289,7 +280,7 @@ class BayesSearchCV(BaseSearchCV):
 
     def __init__(self, estimator, search_spaces, optimizer_kwargs=None,
                  n_iter=50, scoring=None, fit_params=None, n_jobs=1,
-                 n_points=1, iid=True, refit=True, cv=None, verbose=0,
+                 n_points=1, iid='deprecated', refit=True, cv=None, verbose=0,
                  pre_dispatch='2*n_jobs', random_state=None,
                  error_score='raise', return_train_score=False):
 
@@ -305,9 +296,14 @@ def __init__(self, estimator, search_spaces, optimizer_kwargs=None,
         # in the constructor and be passed in ``fit``.
         self.fit_params = fit_params
 
+        if iid != "deprecated":
+            warnings.warn("The `iid` parameter has been deprecated "
+                          "and will be ignored.")
+        self.iid = iid  # For sklearn repr pprint
+
         super(BayesSearchCV, self).__init__(
              estimator=estimator, scoring=scoring,
-             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
+             n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,
              pre_dispatch=pre_dispatch, error_score=error_score,
              return_train_score=return_train_score)
 
@@ -364,170 +360,11 @@ def _check_search_space(self, search_space):
                 "Search space should be provided as a dict or list of dict,"
                 "got %s" % search_space)
 
-    # copied for compatibility with 0.19 sklearn from 0.18 BaseSearchCV
-    @property
-    def best_score_(self):
-        check_is_fitted(self, 'cv_results_')
-        return self.cv_results_['mean_test_score'][self.best_index_]
-
-    # copied for compatibility with 0.19 sklearn from 0.18 BaseSearchCV
-    @property
-    def best_params_(self):
-        check_is_fitted(self, 'cv_results_')
-        return self.cv_results_['params'][self.best_index_]
-
     @property
     def optimizer_results_(self):
         check_is_fitted(self, '_optim_results')
         return self._optim_results
 
-    # copied for compatibility with 0.19 sklearn from 0.18 BaseSearchCV
-    def _fit(self, X, y, groups, parameter_iterable):
-        """
-        Actual fitting,  performing the search over parameters.
-        Taken from https://github.com/scikit-learn/scikit-learn/blob/0.18.X
-                    .../sklearn/model_selection/_search.py
-        """
-        estimator = self.estimator
-        cv = sklearn.model_selection._validation.check_cv(
-            self.cv, y, classifier=is_classifier(estimator))
-        self.scorer_ = check_scoring(
-            self.estimator, scoring=self.scoring)
-
-        X, y, groups = indexable(X, y, groups)
-        n_splits = cv.get_n_splits(X, y, groups)
-        if self.verbose > 0 and isinstance(parameter_iterable, Sized):
-            n_candidates = len(parameter_iterable)
-            print("Fitting {0} folds for each of {1} candidates, totalling"
-                  " {2} fits".format(n_splits, n_candidates,
-                                     n_candidates * n_splits))
-
-        base_estimator = clone(self.estimator)
-        pre_dispatch = self.pre_dispatch
-
-        cv_iter = list(cv.split(X, y, groups))
-        out = Parallel(
-            n_jobs=self.n_jobs, verbose=self.verbose,
-            pre_dispatch=pre_dispatch
-        )(delayed(sklearn.model_selection._validation._fit_and_score)(
-                clone(base_estimator),
-                X, y, self.scorer_,
-                train, test, self.verbose, parameters,
-                fit_params=self.fit_params,
-                return_train_score=self.return_train_score,
-                return_n_test_samples=True,
-                return_times=True, return_parameters=True,
-                error_score=self.error_score
-            )
-            for parameters in parameter_iterable
-            for train, test in cv_iter)
-
-        # if one choose to see train score, "out" will contain train score info
-        if self.return_train_score:
-            (train_scores, test_scores, test_sample_counts,
-             fit_time, score_time, parameters) = zip(*out)
-        else:
-            (test_scores, test_sample_counts,
-             fit_time, score_time, parameters) = zip(*out)
-
-        candidate_params = parameters[::n_splits]
-        n_candidates = len(candidate_params)
-
-        results = dict()
-
-        def _store(key_name, array, weights=None, splits=False, rank=False):
-            """A small helper to store the scores/times to the cv_results_"""
-            array = np.array(array, dtype=np.float64).reshape(n_candidates,
-                                                              n_splits)
-            if splits:
-                for split_i in range(n_splits):
-                    results["split%d_%s"
-                            % (split_i, key_name)] = array[:, split_i]
-
-            array_means = np.average(array, axis=1, weights=weights)
-            results['mean_%s' % key_name] = array_means
-            # Weighted std is not directly available in numpy
-            array_stds = np.sqrt(np.average((array -
-                                             array_means[:, np.newaxis]) ** 2,
-                                            axis=1, weights=weights))
-            results['std_%s' % key_name] = array_stds
-
-            if rank:
-                results["rank_%s" % key_name] = np.asarray(
-                    rankdata(-array_means, method='min'), dtype=np.int32)
-
-        # Computed the (weighted) mean and std for test scores alone
-        # NOTE test_sample counts (weights) remain the same for all candidates
-        test_sample_counts = np.array(test_sample_counts[:n_splits],
-                                      dtype=np.int)
-
-        _store('test_score', test_scores, splits=True, rank=True,
-               weights=test_sample_counts if self.iid else None)
-        if self.return_train_score:
-            _store('train_score', train_scores, splits=True)
-        _store('fit_time', fit_time)
-        _store('score_time', score_time)
-
-        best_index = np.flatnonzero(results["rank_test_score"] == 1)[0]
-        best_parameters = candidate_params[best_index]
-
-        # Use one MaskedArray and mask all the places where the param is not
-        # applicable for that candidate. Use defaultdict as each candidate may
-        # not contain all the params
-        param_results = defaultdict(partial(np.ma.array,
-                                            np.empty(n_candidates,),
-                                            mask=True,
-                                            dtype=object))
-        for cand_i, params in enumerate(candidate_params):
-            for name, value in params.items():
-                # An all masked empty array gets created for the key
-                # `"param_%s" % name` at the first occurence of `name`.
-                # Setting the value at an index also unmasks that index
-                param_results["param_%s" % name][cand_i] = value
-
-        results.update(param_results)
-
-        # Store a list of param dicts at the key 'params'
-        results['params'] = candidate_params
-
-        self.cv_results_ = results
-        self.best_index_ = best_index
-        self.n_splits_ = n_splits
-
-        if self.refit:
-            # fit the best estimator using the entire dataset
-            # clone first to work around broken estimators
-            best_estimator = clone(base_estimator).set_params(
-                **best_parameters)
-            if y is not None:
-                best_estimator.fit(X, y, **self.fit_params)
-            else:
-                best_estimator.fit(X, **self.fit_params)
-            self.best_estimator_ = best_estimator
-        return self
-
-    def _fit_best_model(self, X, y):
-        """Fit the estimator copy with best parameters found to the
-        provided data.
-
-        Parameters
-        ----------
-        X : array-like, shape = [n_samples, n_features]
-            Input data, where n_samples is the number of samples and
-            n_features is the number of features.
-
-        y : array-like, shape = [n_samples] or [n_samples, n_output],
-            Target relative to X for classification or regression.
-
-        Returns
-        -------
-        self
-        """
-        self.best_estimator_ = clone(self.estimator)
-        self.best_estimator_.set_params(**self.best_params_)
-        self.best_estimator_.fit(X, y, **(self.fit_params or {}))
-        return self
-
     def _make_optimizer(self, params_space):
         """Instantiate skopt Optimizer class.
 
@@ -556,10 +393,9 @@ def _make_optimizer(self, params_space):
 
         return optimizer
 
-    def _step(self, X, y, search_space, optimizer, groups=None, n_points=1):
+    def _step(self, search_space, optimizer, evaluate_candidates, n_points=1):
         """Generate n_jobs parameters and evaluate them in parallel.
         """
-
         # get parameter values to evaluate
         params = optimizer.ask(n_points=n_points)
 
@@ -569,33 +405,10 @@ def _step(self, X, y, search_space, optimizer, groups=None, n_points=1):
         # make lists into dictionaries
         params_dict = [point_asdict(search_space, p) for p in params]
 
-        # HACK: self.cv_results_ is reset at every call to _fit, keep current
-        all_cv_results = self.cv_results_
-
-        # HACK: this adds compatibility with different versions of sklearn
-        refit = self.refit
-        self.refit = False
-        self._fit(X, y, groups, params_dict)
-        self.refit = refit
-
-        # merge existing and new cv_results_
-        for k in self.cv_results_:
-            all_cv_results[k].extend(self.cv_results_[k])
-
-        all_cv_results["rank_test_score"] = list(np.asarray(
-            rankdata(-np.array(all_cv_results['mean_test_score']),
-                     method='min'), dtype=np.int32))
-        if self.return_train_score:
-            all_cv_results["rank_train_score"] = list(np.asarray(
-                rankdata(-np.array(all_cv_results['mean_train_score']),
-                         method='min'), dtype=np.int32))
-        self.cv_results_ = all_cv_results
-        self.best_index_ = np.argmax(self.cv_results_['mean_test_score'])
-
-        # feed the point and objective back into optimizer
-        local_results = self.cv_results_['mean_test_score'][-len(params):]
-
-        # optimizer minimizes objective, hence provide negative score
+        all_results = evaluate_candidates(params_dict)
+        # Feed the point and objective value back into optimizer
+        # Optimizer minimizes objective, hence provide negative score
+        local_results = all_results["mean_test_score"][-len(params):]
         return optimizer.tell(params, [-score for score in local_results])
 
     @property
@@ -621,10 +434,8 @@ def total_iterations(self):
 
         return total_iter
 
-    def _run_search(self, x):
-        pass
-
-    def fit(self, X, y=None, groups=None, callback=None):
+    # TODO: Accept callbacks via the constructor?
+    def fit(self, X, y=None, *, groups=None, callback=None, **fit_params):
         """Run fit on the estimator with randomly drawn parameters.
 
         Parameters
@@ -645,18 +456,31 @@ def fit(self, X, y=None, groups=None, callback=None):
             combination tested. If list of callables, then each callable in
             the list is called.
         """
+        self._callbacks = check_callback(callback)
 
+        if self.optimizer_kwargs is None:
+            self.optimizer_kwargs_ = {}
+        else:
+            self.optimizer_kwargs_ = dict(self.optimizer_kwargs)
+
+        super().fit(X=X, y=y, groups=groups, **fit_params)
+
+        # BaseSearchCV never ranked train scores,
+        # but apparently we used to ship this (back-compat)
+        if self.return_train_score:
+            self.cv_results_["rank_train_score"] = \
+                rankdata(-np.array(self.cv_results_["mean_train_score"]),
+                         method='min').astype(int)
+        return self
+
+    def _run_search(self, evaluate_candidates):
         # check if space is a single dict, convert to list if so
         search_spaces = self.search_spaces
         if isinstance(search_spaces, dict):
             search_spaces = [search_spaces]
 
-        callbacks = check_callback(callback)
+        callbacks = self._callbacks
 
-        if self.optimizer_kwargs is None:
-            self.optimizer_kwargs_ = {}
-        else:
-            self.optimizer_kwargs_ = dict(self.optimizer_kwargs)
         random_state = check_random_state(self.random_state)
         self.optimizer_kwargs_['random_state'] = random_state
 
@@ -668,9 +492,6 @@ def fit(self, X, y=None, groups=None, callback=None):
             optimizers.append(self._make_optimizer(search_space))
         self.optimizers_ = optimizers  # will save the states of the optimizers
 
-        self.cv_results_ = defaultdict(list)
-        self.best_index_ = None
-        self.multimetric_ = False
         self._optim_results = []
 
         n_points = self.n_points
@@ -689,17 +510,11 @@ def fit(self, X, y=None, groups=None, callback=None):
                 n_points_adjusted = min(n_iter, n_points)
 
                 optim_result = self._step(
-                    X, y, search_space, optimizer,
-                    groups=groups, n_points=n_points_adjusted
+                    search_space, optimizer,
+                    evaluate_candidates, n_points=n_points_adjusted
                 )
                 n_iter -= n_points
 
                 if eval_callbacks(callbacks, optim_result):
                     break
             self._optim_results.append(optim_result)
-
-        # Refit the best model on the the whole dataset
-        if self.refit:
-            self._fit_best_model(X, y)
-
-        return self

From c53998816481d150ccc745ffd07d022fdb1fd25d Mon Sep 17 00:00:00 2001
From: Kernc <kerncece@gmail.com>
Date: Sat, 8 May 2021 04:07:12 +0200
Subject: [PATCH 252/265] Re-add `BayesSearchCV.best_score_` needed by some
 examples

Removed in 9461bfecc12c8cab7ae18a55e44e1d258b3c09c9
"Remove BayesSearchCV(iid=) parameter deprecated in sklearn 0.24 (#988)"
---
 skopt/searchcv.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index a37bc79ad..fbf406f64 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -360,6 +360,12 @@ def _check_search_space(self, search_space):
                 "Search space should be provided as a dict or list of dict,"
                 "got %s" % search_space)
 
+    # copied for compatibility with 0.19 sklearn from 0.18 BaseSearchCV
+    @property
+    def best_score_(self):
+        check_is_fitted(self, 'cv_results_')
+        return self.cv_results_['mean_test_score'][self.best_index_]
+
     @property
     def optimizer_results_(self):
         check_is_fitted(self, '_optim_results')

From fb3d17a0a63a80acd14294957571912539fc8978 Mon Sep 17 00:00:00 2001
From: Kernc <kerncece@gmail.com>
Date: Tue, 11 May 2021 01:35:53 +0200
Subject: [PATCH 253/265] Revert "Re-add `BayesSearchCV.best_score_` needed by
 some examples"

This reverts commit c53998816481d150ccc745ffd07d022fdb1fd25d.

Instead, the fix is not to rely on `BayesSearchCV.best_score_`
in callbacks (i.e. until `BaseSearchCV.fit` computes it).
---
 examples/sklearn-gridsearchcv-replacement.py | 2 +-
 skopt/searchcv.py                            | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/examples/sklearn-gridsearchcv-replacement.py b/examples/sklearn-gridsearchcv-replacement.py
index 727777a86..2b0728100 100644
--- a/examples/sklearn-gridsearchcv-replacement.py
+++ b/examples/sklearn-gridsearchcv-replacement.py
@@ -174,7 +174,7 @@
 
 # callback handler
 def on_step(optim_result):
-    score = searchcv.best_score_
+    score = -optim_result['fun']
     print("best score: %s" % score)
     if score >= 0.98:
         print('Interrupting!')
diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index fbf406f64..a37bc79ad 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -360,12 +360,6 @@ def _check_search_space(self, search_space):
                 "Search space should be provided as a dict or list of dict,"
                 "got %s" % search_space)
 
-    # copied for compatibility with 0.19 sklearn from 0.18 BaseSearchCV
-    @property
-    def best_score_(self):
-        check_is_fitted(self, 'cv_results_')
-        return self.cv_results_['mean_test_score'][self.best_index_]
-
     @property
     def optimizer_results_(self):
         check_is_fitted(self, '_optim_results')

From 6d166805828376c39349eb886700e87d0100cfe6 Mon Sep 17 00:00:00 2001
From: freddyaboulton <alfonsoboulton@gmail.com>
Date: Tue, 4 May 2021 17:17:37 -0400
Subject: [PATCH 254/265] Use deepcopy to prevent reference cycles

---
 skopt/optimizer/base.py      | 4 ++--
 skopt/optimizer/optimizer.py | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index ac26bd66e..acb350e85 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -209,8 +209,8 @@ def base_minimize(func, dimensions, base_estimator,
         For more details related to the OptimizeResult object, refer
         http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
     """
-    specs = {"args": copy.copy(inspect.currentframe().f_locals),
-             "function": inspect.currentframe().f_code.co_name}
+    specs = {"args": copy.deepcopy(locals()),
+             "function": copy.deepcopy(inspect.currentframe().f_code.co_name)}
 
     acq_optimizer_kwargs = {
         "n_points": n_points, "n_restarts_optimizer": n_restarts_optimizer,
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index 8aa32169d..047b52175 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -3,7 +3,6 @@
 from math import log
 from numbers import Number
 import copy
-import inspect
 import numpy as np
 
 from scipy.optimize import fmin_l_bfgs_b
@@ -171,7 +170,7 @@ def __init__(self, dimensions, base_estimator="gp",
                  model_queue_size=None,
                  acq_func_kwargs=None,
                  acq_optimizer_kwargs=None):
-        self.specs = {"args": copy.copy(inspect.currentframe().f_locals),
+        self.specs = {"args": copy.deepcopy(locals()),
                       "function": "Optimizer"}
         self.rng = check_random_state(random_state)
 

From a9186508205e3a57b109338ce58dc9d00bfa4376 Mon Sep 17 00:00:00 2001
From: freddyaboulton <alfonsoboulton@gmail.com>
Date: Wed, 5 May 2021 16:33:29 -0400
Subject: [PATCH 255/265] Not using copy in optimizer for specs

---
 skopt/optimizer/base.py      | 2 +-
 skopt/optimizer/optimizer.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index acb350e85..5d5b5856f 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -210,7 +210,7 @@ def base_minimize(func, dimensions, base_estimator,
         http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
     """
     specs = {"args": copy.deepcopy(locals()),
-             "function": copy.deepcopy(inspect.currentframe().f_code.co_name)}
+             "function": "base_minimize"}
 
     acq_optimizer_kwargs = {
         "n_points": n_points, "n_restarts_optimizer": n_restarts_optimizer,
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index 047b52175..01e732d35 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -2,7 +2,6 @@
 import warnings
 from math import log
 from numbers import Number
-import copy
 import numpy as np
 
 from scipy.optimize import fmin_l_bfgs_b
@@ -170,7 +169,9 @@ def __init__(self, dimensions, base_estimator="gp",
                  model_queue_size=None,
                  acq_func_kwargs=None,
                  acq_optimizer_kwargs=None):
-        self.specs = {"args": copy.deepcopy(locals()),
+        args = locals()
+        del args['self']
+        self.specs = {"args": args,
                       "function": "Optimizer"}
         self.rng = check_random_state(random_state)
 

From 438a6daf919c9694fd9b22e2618a30c8e88acfd7 Mon Sep 17 00:00:00 2001
From: freddyaboulton <alfonsoboulton@gmail.com>
Date: Fri, 7 May 2021 11:52:05 -0400
Subject: [PATCH 256/265] No deep copy locals in base_minimize

---
 skopt/optimizer/base.py      | 4 +---
 skopt/optimizer/optimizer.py | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/skopt/optimizer/base.py b/skopt/optimizer/base.py
index 5d5b5856f..0ded90bb4 100644
--- a/skopt/optimizer/base.py
+++ b/skopt/optimizer/base.py
@@ -4,8 +4,6 @@
 It is sufficient that one re-implements the base estimator.
 """
 
-import copy
-import inspect
 import warnings
 import numbers
 try:
@@ -209,7 +207,7 @@ def base_minimize(func, dimensions, base_estimator,
         For more details related to the OptimizeResult object, refer
         http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
     """
-    specs = {"args": copy.deepcopy(locals()),
+    specs = {"args": locals(),
              "function": "base_minimize"}
 
     acq_optimizer_kwargs = {
diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py
index 01e732d35..240ffbfc1 100644
--- a/skopt/optimizer/optimizer.py
+++ b/skopt/optimizer/optimizer.py
@@ -169,7 +169,7 @@ def __init__(self, dimensions, base_estimator="gp",
                  model_queue_size=None,
                  acq_func_kwargs=None,
                  acq_optimizer_kwargs=None):
-        args = locals()
+        args = locals().copy()
         del args['self']
         self.specs = {"args": args,
                       "function": "Optimizer"}

From 747b43fb03126de5cb26e2090d6ecd93943a2dd7 Mon Sep 17 00:00:00 2001
From: Sam <samuel.robert.stevens@gmail.com>
Date: Tue, 22 Jun 2021 05:20:42 -0700
Subject: [PATCH 257/265] DOC: Fix 'less' vs 'fewer' (#1040)

---
 examples/hyperparameter-optimization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/hyperparameter-optimization.py b/examples/hyperparameter-optimization.py
index 0de3bb1fb..46a75aec9 100644
--- a/examples/hyperparameter-optimization.py
+++ b/examples/hyperparameter-optimization.py
@@ -22,7 +22,7 @@
 
 In this notebook, we illustrate how to couple :class:`gp_minimize` with sklearn's
 estimators to tune hyper-parameters using sequential model-based optimisation,
-hopefully resulting in equivalent or better solutions, but within less
+hopefully resulting in equivalent or better solutions, but within fewer
 evaluations.
 
 Note: scikit-optimize provides a dedicated interface for estimator tuning via

From 8123056f258d7e48b05f3123cfdb6809ddc8e3dc Mon Sep 17 00:00:00 2001
From: QuentinSoubeyran <45202794+QuentinSoubeyran@users.noreply.github.com>
Date: Mon, 4 Oct 2021 21:42:29 +0200
Subject: [PATCH 258/265] MNT: Bump numpy/scipy on CircleCI (#1069)

* update numpy in circleci deps

* update scipy in circleci
---
 .circleci/config.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 3b307cc50..af8df0ed6 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -10,9 +10,9 @@ jobs:
       - MINICONDA_PATH: ~/miniconda
       - CONDA_ENV_NAME: testenv
       - PYTHON_VERSION: 3.6
-      - NUMPY_VERSION: 1.13.3
+      - NUMPY_VERSION: 1.14.6
       - SKLEARN_VERSION: 0.20.4
-      - SCIPY_VERSION: 0.19.1
+      - SCIPY_VERSION: 1.1.0
       - MATPLOTLIB_VERSION: 2.1.1
       # on conda, this is the latest for python 3.5
       # The following places need to be in sync with regard to Cython version:

From 708bf1645c9767beafd4b198eb2ee5935dffc8ff Mon Sep 17 00:00:00 2001
From: QuentinSoubeyran <45202794+QuentinSoubeyran@users.noreply.github.com>
Date: Tue, 5 Oct 2021 19:40:11 +0200
Subject: [PATCH 259/265] BUG: Fix GaussianProcessRegressor sklearn version
 check for scikit-learn 1.0 (#1063)

* better scikit-learn option parsing for gpr

* fixed test using old sklearn API

* remove spurious newline

* fix circleci linting

* updated circleci numpy and scipy version

* updated waht's new

* removed packaging dependency
---
 doc/whats_new/v0.9.rst                       |  6 ++++++
 examples/sklearn-gridsearchcv-replacement.py |  2 +-
 skopt/learning/gaussian_process/gpr.py       |  4 ++--
 skopt/searchcv.py                            |  2 +-
 skopt/tests/test_plots.py                    |  6 ++++--
 skopt/tests/test_searchcv.py                 | 14 +++++++-------
 6 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/doc/whats_new/v0.9.rst b/doc/whats_new/v0.9.rst
index 2085fe0b3..6e8e599ed 100644
--- a/doc/whats_new/v0.9.rst
+++ b/doc/whats_new/v0.9.rst
@@ -14,3 +14,9 @@ Version 0.9.0
   :pr:`988`
 - |API| Deprecate :class:`skopt.searchcv.BayesSearchCV` parameter `iid=`.
     :pr:`988`
+
+:mod:`skopt.learning.gaussian_process.gpr`
+------------------------------------------
+- |Fix| Fix :class:`skopt.learning.gaussian_process.gpr.GaussianProcessRegressor`
+  for scikit-learn >= 1.0
+  :pr:`1063`
\ No newline at end of file
diff --git a/examples/sklearn-gridsearchcv-replacement.py b/examples/sklearn-gridsearchcv-replacement.py
index 2b0728100..2d52b8775 100644
--- a/examples/sklearn-gridsearchcv-replacement.py
+++ b/examples/sklearn-gridsearchcv-replacement.py
@@ -198,7 +198,7 @@ def on_step(optim_result):
 from sklearn.datasets import load_iris
 from sklearn.svm import SVC
 
-X, y = load_iris(True)
+X, y = load_iris(return_X_y=True)
 
 searchcv = BayesSearchCV(
     SVC(),
diff --git a/skopt/learning/gaussian_process/gpr.py b/skopt/learning/gaussian_process/gpr.py
index 6b13bb686..34902d78c 100644
--- a/skopt/learning/gaussian_process/gpr.py
+++ b/skopt/learning/gaussian_process/gpr.py
@@ -224,10 +224,10 @@ def fit(self, X, y):
         self.K_inv_ = L_inv.dot(L_inv.T)
 
         # Fix deprecation warning #462
-        if int(sklearn.__version__[2:4]) >= 23:
+        if sklearn.__version__ >= "0.23":
             self.y_train_std_ = self._y_train_std
             self.y_train_mean_ = self._y_train_mean
-        elif int(sklearn.__version__[2:4]) >= 19:
+        elif sklearn.__version__ >= "0.19":
             self.y_train_mean_ = self._y_train_mean
             self.y_train_std_ = 1
         else:
diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index a37bc79ad..391b14cfb 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -157,7 +157,7 @@ class BayesSearchCV(BaseSearchCV):
     >>> from sklearn.svm import SVC
     >>> from sklearn.model_selection import train_test_split
     >>>
-    >>> X, y = load_iris(True)
+    >>> X, y = load_iris(return_X_y=True)
     >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
     ...                                                     train_size=0.75,
     ...                                                     random_state=0)
diff --git a/skopt/tests/test_plots.py b/skopt/tests/test_plots.py
index 3fb57d94c..6a909ac85 100644
--- a/skopt/tests/test_plots.py
+++ b/skopt/tests/test_plots.py
@@ -39,7 +39,8 @@ def objective(params):
                                      **{dim.name: val
                                         for dim, val in zip(SPACE, params)
                                         if dim.name != 'dummy'})
-        return -np.mean(cross_val_score(clf, *load_breast_cancer(True)))
+        X, y = load_breast_cancer(return_X_y=True)
+        return -np.mean(cross_val_score(clf, X, y))
 
     res = gp_minimize(objective, SPACE, n_calls=10, random_state=3)
 
@@ -112,7 +113,8 @@ def objective(params):
                                      **{dim.name: val
                                         for dim, val in zip(SPACE, params)
                                         if dim.name != 'dummy'})
-        return -np.mean(cross_val_score(clf, *load_breast_cancer(True)))
+        X, y = load_breast_cancer(return_X_y=True)
+        return -np.mean(cross_val_score(clf, X, y))
 
     res = gp_minimize(objective, SPACE, n_calls=10, random_state=3)
     plots.plot_convergence(res)
diff --git a/skopt/tests/test_searchcv.py b/skopt/tests/test_searchcv.py
index 67c15eee9..8dcb6f44b 100644
--- a/skopt/tests/test_searchcv.py
+++ b/skopt/tests/test_searchcv.py
@@ -93,7 +93,7 @@ def test_searchcv_runs(surrogate, n_jobs, n_points, cv=None):
 
     """
 
-    X, y = load_iris(True)
+    X, y = load_iris(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, train_size=0.75, random_state=0
     )
@@ -140,7 +140,7 @@ def test_searchcv_runs_multiple_subspaces():
     multiple subspaces are given.
     """
 
-    X, y = load_iris(True)
+    X, y = load_iris(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, train_size=0.75, random_state=0
     )
@@ -193,7 +193,7 @@ def test_searchcv_sklearn_compatibility():
     such as clone, set_params, get_params.
     """
 
-    X, y = load_iris(True)
+    X, y = load_iris(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, train_size=0.75, random_state=0
     )
@@ -257,7 +257,7 @@ def test_searchcv_reproducibility():
     random state.
     """
 
-    X, y = load_iris(True)
+    X, y = load_iris(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, train_size=0.75, random_state=0
     )
@@ -305,7 +305,7 @@ def test_searchcv_rank():
     random state.
     """
 
-    X, y = load_iris(True)
+    X, y = load_iris(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, train_size=0.75, random_state=0
     )
@@ -341,7 +341,7 @@ def test_searchcv_refit():
     random state.
     """
 
-    X, y = load_iris(True)
+    X, y = load_iris(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, train_size=0.75, random_state=0
     )
@@ -383,7 +383,7 @@ def test_searchcv_callback():
     # Test whether callback is used in BayesSearchCV and
     # whether is can be used to interrupt the search loop
 
-    X, y = load_iris(True)
+    X, y = load_iris(return_X_y=True)
     opt = BayesSearchCV(
         DecisionTreeClassifier(),
         {

From fc469d9c0704aa8e82db06fbeb28b024c656ea9f Mon Sep 17 00:00:00 2001
From: Kernc <kerncece@gmail.com>
Date: Wed, 6 Oct 2021 01:16:22 +0200
Subject: [PATCH 260/265] CLN: Fix remaining sklearn 1.0 return_X_y= error

---
 examples/plots/partial-dependence-plot-with-categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/plots/partial-dependence-plot-with-categorical.py b/examples/plots/partial-dependence-plot-with-categorical.py
index 5e87e38f6..6aa075cf6 100644
--- a/examples/plots/partial-dependence-plot-with-categorical.py
+++ b/examples/plots/partial-dependence-plot-with-categorical.py
@@ -35,7 +35,7 @@ def objective(params):
     clf = DecisionTreeClassifier(
         **{dim.name: val for dim, val in
            zip(SPACE, params) if dim.name != 'dummy'})
-    return -np.mean(cross_val_score(clf, *load_breast_cancer(True)))
+    return -np.mean(cross_val_score(clf, *load_breast_cancer(return_X_y=True)))
 
 #############################################################################
 # Bayesian optimization

From c3abe8b51db782bebbc5ca69e7e4d4ec8289200c Mon Sep 17 00:00:00 2001
From: Kernc <kerncece@gmail.com>
Date: Thu, 7 Oct 2021 01:59:59 +0200
Subject: [PATCH 261/265] MNT: Fix CircleCI builds by unpinning + full docs for
 PRs

---
 .circleci/config.yml            | 13 +------------
 build_tools/circle/build_doc.sh | 11 ++++-------
 2 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index af8df0ed6..1a53edff5 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -9,18 +9,7 @@ jobs:
       - MKL_NUM_THREADS: 2
       - MINICONDA_PATH: ~/miniconda
       - CONDA_ENV_NAME: testenv
-      - PYTHON_VERSION: 3.6
-      - NUMPY_VERSION: 1.14.6
-      - SKLEARN_VERSION: 0.20.4
-      - SCIPY_VERSION: 1.1.0
-      - MATPLOTLIB_VERSION: 2.1.1
-      # on conda, this is the latest for python 3.5
-      # The following places need to be in sync with regard to Cython version:
-      # - .circleci config file
-      # - sklearn/_build_utils/__init__.py
-      # - advanced installation guide
-      - CYTHON_VERSION: 0.28.5
-      - SCIKIT_IMAGE_VERSION: 0.13
+      - PYTHON_VERSION: 3.7
     steps:
       - checkout
       - run: ./build_tools/circle/checkout_merge_commit.sh
diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index e293f346b..ab4937787 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -121,9 +121,6 @@ if [[ "$CIRCLE_BRANCH" =~ ^master$|^[0-9]+\.[0-9]+\.X$ && -z "$CI_PULL_REQUEST"
 then
     # PDF linked into HTML
     make_args="dist LATEXMKOPTS=-halt-on-error"
-elif [[ "$build_type" =~ ^QUICK ]]
-then
-    make_args=html-noplot
 elif [[ "$build_type" =~ ^'BUILD: detected examples' ]]
 then
     # pattern for examples to run is the last line of output
@@ -168,10 +165,10 @@ fi
 
 # packaging won't be needed once setuptools starts shipping packaging>=17.0
 conda create -n $CONDA_ENV_NAME --yes --quiet python="${PYTHON_VERSION:-*}" \
-  numpy="${NUMPY_VERSION:-*}" scipy="${SCIPY_VERSION:-*}" \
-  cython="${CYTHON_VERSION:-*}" pytest coverage \
-  matplotlib="${MATPLOTLIB_VERSION:-*}" sphinx=2.1.2 pillow \
-  scikit-image="${SCIKIT_IMAGE_VERSION:-*}" pandas="${PANDAS_VERSION:-*}" \
+  numpy scipy \
+  cython pytest coverage \
+  matplotlib sphinx pillow \
+  scikit-image pandas \
   joblib memory_profiler packaging
 
 export MPLBACKEND="agg"

From acdafb9dd9d18c1da9b54195ae24e2010b415579 Mon Sep 17 00:00:00 2001
From: Kernc <kerncece@gmail.com>
Date: Thu, 7 Oct 2021 16:40:39 +0200
Subject: [PATCH 262/265] MNT: Fix "LaTeX Error: File 'tgtermes.sty' not
 found." on CircleCI

---
 build_tools/circle/build_doc.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index ab4937787..f5ac1a3ee 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -139,7 +139,7 @@ sudo -E apt-get -yq remove texlive-binaries --purge
 sudo -E apt-get -yq --no-install-suggests --no-install-recommends \
     install dvipng texlive-latex-base texlive-latex-extra \
     texlive-latex-recommended texlive-fonts-recommended \
-    latexmk gsfonts ccache
+    latexmk tex-gyre gsfonts ccache
 
 # deactivate circleci virtualenv and setup a miniconda env instead
 if [[ `type -t deactivate` ]]; then

From 180b0848ebdd6279702529a13a14d7971d8818b9 Mon Sep 17 00:00:00 2001
From: kernc <kerncece@gmail.com>
Date: Mon, 11 Oct 2021 22:06:05 +0200
Subject: [PATCH 263/265] DOC: Update whats_new for 0.9.0 (#1075)

* DOC: Update whats_new for 0.9.0

* MNT: Bump __version__ to v0.9rc1

* MNT: Redact .travis.yml for minimal credits use
---
 .travis.yml            | 29 ++++-------------------------
 doc/whats_new/v0.9.rst | 26 ++++++++++++++++----------
 skopt/__init__.py      |  2 +-
 3 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index d94a52c75..d79ac469d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,32 +16,11 @@ env:
 
 matrix:
   include:
-    # Linux environment to test scikit-learn against numpy and scipy master
-    # installed from their CI wheels in a virtualenv with the Python
-    # interpreter provided by travis.
-    - name: "Python 3.6 - scikit 0.20.4"
-      python: "3.6"
-      env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.13.3" SCIPY_VERSION="0.19.1" PYAML_VERSION="16.9.0"
-           SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
-           JOBLIB_VERSION="0.11"
-    - name: "Python 3.6 - scikit 0.21.3"
-      python: "3.6"
-      env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.14.0" SCIPY_VERSION="1.0.0" PYAML_VERSION="16.12.0"
-           SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="2.0.0" COVERAGE="false"
-           JOBLIB_VERSION="0.11"
-    - name: "Python 3.7 - scikit 0.22.1"
+    - name: "Python 3.7  - scikit 0.24.2"
       python: "3.7"
-      env: DISTRIB="conda" PYTHON_VERSION="3.7"
-         NUMPY_VERSION="1.16.0" SCIPY_VERSION="1.2.0" PYAML_VERSION="17.8.0"
-         SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true"
-         JOBLIB_VERSION="0.13"
-    - name: "Python 3.8  - scikit 0.23.2"
-      python: "3.8"
-      env: DISTRIB="conda" PYTHON_VERSION="3.8.1" COVERAGE="false"
+      env: DISTRIB="conda" PYTHON_VERSION="3.7" COVERAGE="false"
            NUMPY_VERSION="1.19.1" SCIPY_VERSION="1.5.2" PYAML_VERSION="20.4.0"
-           SCIKIT_LEARN_VERSION="0.23.2" JOBLIB_VERSION="0.16.0"
+           SCIKIT_LEARN_VERSION="0.24.2" JOBLIB_VERSION="0.16.0"
     - name: "Python 3.7 - sdist check"
       python: "3.7"
       env: DISTRIB="conda" PYTHON_VERSION="3.7"
@@ -64,7 +43,7 @@ deploy:
   on:
     tags: true
     repo: scikit-optimize/scikit-optimize
-    condition: "$PYTHON_VERSION = 3.6"
+    condition: "$PYTHON_VERSION = 3.7"
   skip_cleanup: true
   skip_existing: true
   password:
diff --git a/doc/whats_new/v0.9.rst b/doc/whats_new/v0.9.rst
index 6e8e599ed..979e09c67 100644
--- a/doc/whats_new/v0.9.rst
+++ b/doc/whats_new/v0.9.rst
@@ -6,17 +6,23 @@
 
 Version 0.9.0
 =============
-**In Development**
+**October 2021**
 
-:mod:`skopt.searchcv`
----------------------
+- |Fix| :obj:`skopt.learning.gaussian_process.gpr.GaussianProcessRegressor`
+  for sklearn >= 0.23. :pr:`943`
+- Change `skip=` parameter in :obj:`skopt.sampler.sobol.Sobol`
+  initial point generator. :pr:`955`
+- |Feature| :obj:`skopt.callbacks.HollowIterationsStopper` callback. :pr:`917`
+- |Feature| :obj:`skopt.callbacks.ThresholdStopper` callback. :pr:`1000`
 - |Fix| Fix :obj:`skopt.searchcv.BayesSearchCV` for scikit-learn >= 0.24.
   :pr:`988`
 - |API| Deprecate :class:`skopt.searchcv.BayesSearchCV` parameter `iid=`.
-    :pr:`988`
-
-:mod:`skopt.learning.gaussian_process.gpr`
-------------------------------------------
-- |Fix| Fix :class:`skopt.learning.gaussian_process.gpr.GaussianProcessRegressor`
-  for scikit-learn >= 1.0
-  :pr:`1063`
\ No newline at end of file
+  :pr:`988`
+- |Fix| NumPy deprecation errors. :pr:`1023`
+- |Fix| issue with :class:`skopt.optimizer.optimizer.Optimizer` not being
+  garbage-collectable. :pr:`1029`
+- |Fix| version check in
+  :class:`skopt.learning.gaussian_process.gpr.GaussianProcessRegressor`
+  for scikit-learn >= 1.0. :pr:`1063`
+- Minor documentation improvements.
+- Various small bugs and fixes.
diff --git a/skopt/__init__.py b/skopt/__init__.py
index 914c8f2f3..7019a4881 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -29,7 +29,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = "0.9.dev0"
+__version__ = "0.9rc1"
 
 if __SKOPT_SETUP__:
     import sys

From 48dddc0e45803596391acd55d0438bb3e144073e Mon Sep 17 00:00:00 2001
From: Kernc <kerncece@gmail.com>
Date: Tue, 12 Oct 2021 13:08:54 +0200
Subject: [PATCH 264/265] MNT: Update __version__ to 0.9.0

---
 skopt/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/__init__.py b/skopt/__init__.py
index 7019a4881..80a423a9f 100644
--- a/skopt/__init__.py
+++ b/skopt/__init__.py
@@ -29,7 +29,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = "0.9rc1"
+__version__ = "0.9.0"
 
 if __SKOPT_SETUP__:
     import sys

From a2369ddbc332d16d8ff173b12404b03fea472492 Mon Sep 17 00:00:00 2001
From: Kernc <kerncece@gmail.com>
Date: Tue, 12 Oct 2021 14:51:32 +0200
Subject: [PATCH 265/265] MNT: Print to stdout every so often to avoid CircleCI
 timeout

---
 build_tools/circle/build_doc.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index f5ac1a3ee..e3e57484f 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -188,9 +188,16 @@ then
     python build_tools/circle/list_versions.py > doc/versions.rst
 fi
 
+# Install this noise maker on CircleCI to prevent
+# "Too long with no output (exceeded 10m0s): context deadline exceeded"
+while true; do sleep $((60 * 5)); echo -e '\nStill working ...\n'; done &
+noise_maker=$!
+
 # The pipefail is requested to propagate exit code
 set -o pipefail && cd doc && make $make_args 2>&1 | tee ~/log.txt
 
+kill $noise_maker
+
 # Insert the version warning for deployment
 find _build/html/stable -name "*.html" | xargs sed -i '/<\/body>/ i \
 \    <script src="https://scikit-optimize.github.io/versionwarning.js"></script>'