diff --git a/.circleci/artifact_path b/.circleci/artifact_path new file mode 100644 index 000000000..a03587f29 --- /dev/null +++ b/.circleci/artifact_path @@ -0,0 +1 @@ +0/doc/_changed.html \ No newline at end of file diff --git a/.circleci/config.yml b/.circleci/config.yml index c3890036b..1a53edff5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,53 +1,141 @@ version: 2 +# # Copied from https://github.com/scikit-learn/scikit-learn/blob/master/.circleci/config.yml jobs: - build: + doc-min-dependencies: docker: - - image: continuumio/miniconda3 + - image: circleci/python:3.7.3-stretch + environment: + - OMP_NUM_THREADS: 2 + - MKL_NUM_THREADS: 2 + - MINICONDA_PATH: ~/miniconda + - CONDA_ENV_NAME: testenv + - PYTHON_VERSION: 3.7 steps: - checkout + - run: ./build_tools/circle/checkout_merge_commit.sh - restore_cache: - key: deps-{{ .Branch }}-{{ checksum "setup.py" }}-{{ checksum "build_tools/circle/install.sh" }} - - run: - name: Install dependencies - command: | - bash build_tools/circle/install.sh - no_output_timeout: 1024s + keys: + - doc-min-deps-ccache-{{ .Branch }} + - doc-min-deps-ccache + - run: ./build_tools/circle/build_doc.sh - save_cache: - key: deps-{{ .Branch }}-{{ checksum "setup.py" }}-{{ checksum "build_tools/circle/install.sh" }} + key: doc-min-deps-ccache-{{ .Branch }}-{{ .BuildNum }} paths: - - /opt/conda/pkgs + - ~/.ccache + - ~/.cache/pip + - store_artifacts: + path: doc/_build/html/stable + destination: doc + - store_artifacts: + path: ~/log.txt + destination: log.txt + + doc: + docker: + - image: circleci/python:3.7.3-stretch + environment: + - OMP_NUM_THREADS: 2 + - MKL_NUM_THREADS: 2 + - MINICONDA_PATH: ~/miniconda + - CONDA_ENV_NAME: testenv + - PYTHON_VERSION: 3 + steps: + - checkout + - run: ./build_tools/circle/checkout_merge_commit.sh + - restore_cache: + keys: + - doc-ccache-{{ .Branch }} + - doc-ccache + - run: ./build_tools/circle/build_doc.sh + - save_cache: + key: doc-ccache-{{ .Branch }}-{{ .BuildNum }} + paths: + - ~/.ccache - ~/.cache/pip - - run: - name: Run build - command: | - bash build_tools/circle/execute.sh - if grep -q "Traceback (most recent call last):" nb_to_md.txt; then false; else true; fi - no_output_timeout: 3600s - store_artifacts: - path: ~/doc + path: doc/_build/html/stable destination: doc + - store_artifacts: + path: ~/log.txt + destination: log.txt + # Persists generated documentation so that it can be attached and deployed + # in the 'deploy' step. - persist_to_workspace: - root: ~/doc + root: doc/_build/html paths: . + lint: + docker: + - image: circleci/python:3.6 + steps: + - checkout + - run: ./build_tools/circle/checkout_merge_commit.sh + - run: + name: dependencies + command: sudo pip install flake8 + - run: + name: linting + command: ./build_tools/circle/linting.sh + + pypy3: + docker: + - image: pypy:3.6-7.2.0 + steps: + - restore_cache: + keys: + - pypy3-ccache-{{ .Branch }} + - pypy3-ccache + - checkout + - run: ./build_tools/circle/build_test_pypy.sh + - save_cache: + key: pypy3-ccache-{{ .Branch }}-{{ .BuildNum }} + paths: + - ~/.ccache + - ~/.cache/pip + deploy: docker: - - image: circleci/python:3.6.7 + - image: circleci/python:3.6 steps: - checkout + - run: ./build_tools/circle/checkout_merge_commit.sh + # Attach documentation generated in the 'doc' step so that it can be + # deployed. - attach_workspace: - at: ~/doc + at: doc/_build/html + - run: ls -ltrh doc/_build/html/stable - deploy: - command: bash build_tools/circle/deploy.sh + command: | + if [[ "${CIRCLE_BRANCH}" =~ ^master$|^[0-9]+\.[0-9]+\.X$ ]]; then + bash build_tools/circle/push_doc.sh doc/_build/html/stable + fi workflows: version: 2 build-doc-and-deploy: jobs: - - build + - lint + - doc: + requires: + - lint + - doc-min-dependencies: + requires: + - lint + - pypy3: + filters: + branches: + only: + - 0.20.X - deploy: requires: - - build + - doc + pypy: + triggers: + - schedule: + cron: "0 0 * * *" filters: branches: - only: master + only: + - master + jobs: + - pypy3 \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 8f690c549..d79ac469d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,61 +9,31 @@ cache: env: global: # Directory where tests are run from - - TEST_DIR=/tmp/sklearn + - TEST_DIR=/tmp/skopt - OMP_NUM_THREADS=4 - OPENBLAS_NUM_THREADS=4 matrix: include: - # Linux environment to test scikit-learn against numpy and scipy master - # installed from their CI wheels in a virtualenv with the Python - # interpreter provided by travis. - - name: "Python 3.5 - scikit 0.19.2" + - name: "Python 3.7 - scikit 0.24.2" python: "3.7" - env: DISTRIB="conda" PYTHON_VERSION="3.5" - NUMPY_VERSION="*" SCIPY_VERSION="*" PYAML_VERSION="*" - SCIKIT_LEARN_VERSION="0.19.2" MATPLOTLIB_VERSION="*" COVERAGE="false" - JOBLIB_VERSION="0.11" - - name: "Python 3.6 - scikit 0.20.4" - python: "3.7" - env: DISTRIB="conda" PYTHON_VERSION="3.6" - NUMPY_VERSION="*" SCIPY_VERSION="*" PYAML_VERSION="*" - SCIKIT_LEARN_VERSION="0.20.4" MATPLOTLIB_VERSION="*" COVERAGE="false" - JOBLIB_VERSION="0.12" - - name: "Python 3.7 - scikit 0.21.3" - python: "3.7" - env: DISTRIB="conda" PYTHON_VERSION="3.7" - NUMPY_VERSION="*" SCIPY_VERSION="*" PYAML_VERSION="*" - SCIKIT_LEARN_VERSION="0.21.3" MATPLOTLIB_VERSION="*" COVERAGE="true" - JOBLIB_VERSION="*" - - name: "Python 3.7 - scikit 0.22.1" + env: DISTRIB="conda" PYTHON_VERSION="3.7" COVERAGE="false" + NUMPY_VERSION="1.19.1" SCIPY_VERSION="1.5.2" PYAML_VERSION="20.4.0" + SCIKIT_LEARN_VERSION="0.24.2" JOBLIB_VERSION="0.16.0" + - name: "Python 3.7 - sdist check" python: "3.7" env: DISTRIB="conda" PYTHON_VERSION="3.7" - NUMPY_VERSION="*" SCIPY_VERSION="*" PYAML_VERSION="*" - SCIKIT_LEARN_VERSION="0.22.1" MATPLOTLIB_VERSION="*" COVERAGE="true" - JOBLIB_VERSION="*" - - name: "Python 3.8 latest package versions" - python: "3.7" - env: DISTRIB="conda" PYTHON_VERSION="3.8" COVERAGE="false" NUMPY_VERSION="*" SCIPY_VERSION="*" PYAML_VERSION="*" - SCIKIT_LEARN_VERSION="*" JOBLIB_VERSION="*" + SCIKIT_LEARN_VERSION="*" MATPLOTLIB_VERSION="*" COVERAGE="false" + JOBLIB_VERSION="*" SDIST="true" install: source build_tools/travis/install.sh - script: - - if [ ${COVERAGE} == "true" ]; - then pytest --cov=skopt --durations=10; else - pytest --durations=10; - fi - -after_success: - - if [ ${COVERAGE} == "true" ]; then - pip install codecov; - codecov; - fi - + - bash build_tools/travis/test_script.sh + - bash build_tools/travis/test_docs.sh +after_success: source build_tools/travis/after_success.sh deploy: provider: pypi user: __token__ @@ -73,7 +43,7 @@ deploy: on: tags: true repo: scikit-optimize/scikit-optimize - condition: "$PYTHON_VERSION = 3.6" + condition: "$PYTHON_VERSION = 3.7" skip_cleanup: true skip_existing: true password: diff --git a/.zenodo.json b/.zenodo.json index 7c95c0c3e..71ed3e4b0 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -4,7 +4,8 @@ "creators": [ { "affiliation": "Wild Tree Tech", - "name": "Head, Tim" + "name": "Head, Tim", + "orcid": "0000-0003-0931-3698" }, { "affiliation": "Google Brain", @@ -16,103 +17,12 @@ }, { "affiliation": "ULi\u00e8ge", - "name": "Louppe, Gilles" + "name": "Louppe, Gilles", + "orcid": "0000-0002-2082-3106" }, { "affiliation": "Saarland University", "name": "Shcherbatyi, Iaroslav" - }, - { - "name": "fcharras" - }, - { - "name": "Z\u00e9 Vin\u00edcius" - }, - { - "name": "cmmalone" - }, - { - "name": "Christopher Schr\u00f6der" - }, - { - "name": "nel215" - }, - { - "affiliation": "@yldio", - "name": "Nuno Campos" - }, - { - "name": "Todd Young" - }, - { - "affiliation": "Politecnico di Milano", - "name": "Stefano Cereda" - }, - { - "name": "Thomas Fan" - }, - { - "name": "rene-rex" - }, - { - "affiliation": "Columbia University", - "name": "Kejia (KJ) Shi" - }, - { - "affiliation": "Biomedical Informatics Department, Emory School of Medicine", - "name": "Justus Schwabedal" - }, - { - "name": "carlosdanielcsantos" - }, - { - "affiliation": "Hvass Laboratories", - "name": "Hvass-Labs" - }, - { - "affiliation": "Technical University of Munich", - "name": "Mikhail Pak" - }, - { - "name": "SoManyUsernamesTaken" - }, - { - "affiliation": "UC Berkeley", - "name": "Fred Callaway" - }, - { - "name": "Lo\u00efc Est\u00e8ve" - }, - { - "affiliation": "ENS de Cachan - Paris Saclay University", - "name": "Lilian Besson" - }, - { - "name": "Mehdi Cherti" - }, - { - "affiliation": "Paderborn University", - "name": "Karlson Pfannschmidt" - }, - { - "affiliation": "Toptal", - "name": "Fabian Linzberger" - }, - { - "affiliation": "@point8", - "name": "Christophe Cauet" - }, - { - "affiliation": "10clouds", - "name": "Anna Gut" - }, - { - "affiliation": "Columbia University Data Science Institute", - "name": "Andreas Mueller" - }, - { - "affiliation": "DFKI", - "name": "Alexander Fabisch" } ], "keywords": [ @@ -124,6 +34,13 @@ "hyperparameter", "bayesian-optimization" ], - "license": "bsd-license", + "license": "bsd-3-clause", + "related_identifiers": [ + { + "identifier": "https://scikit-optimize.github.io", + "relation": "documents", + "scheme": "url" + } + ], "upload_type": "software" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 0caf27d91..f9d8b4e29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,178 +1,5 @@ # Release history -## Version 0.7.1 - -### New features - -* Sphinx documentation -* notebooks are replaced by sphinx-gallery -* New StringEncoder, can be used in Categoricals -* Remove string conversion in Identity -* dtype can be set in Integer and Real - -### Bug fixes - -* Fix categorical space (issue #821) -* int can be set as dtype to fix issue #790 - -### Maintenance - -* Old pdoc scripts are removed and replaced by sphinx - -## Version 0.7 - -### New features - -* Models queue has now a customizable size (model_queue_size). -* Add log-uniform prior to Integer space -* Support for plotting categorical dimensions - -### Bug fixes - -* Allow BayesSearchCV to work with sklearn 0.21 -* Reduce the amount of deprecation warnings in unit tests - -### Maintenance - -* joblib instead of sklearn.externals.joblib -* Improve travis CI unit tests (Different sklearn version are checked) -* Added `versioneer` support, to keep things simple and to fix pypi deploy - -## Version 0.6 - -Highly composite six. - -### New features - -* `plot_regret` function for plotting the cumulative regret; -The purpose of such plot is to access how much an optimizer -is effective at picking good points. -* `CheckpointSaver` that can be used to save a -checkpoint after each iteration with skopt.dump -* `Space.from_yaml()` - to allow for external file to define Space parameters - -### Bug fixes - -* Fixed numpy broadcasting issues in gaussian_ei, gaussian_pi -* Fixed build with newest scikit-learn -* Use native python types inside BayesSearchCV -* Include fit_params in BayesSearchCV refit - -### Maintenance - -* Added `versioneer` support, to reduce changes with new version of the `skopt` - -## Version 0.5.2 - -### Bug fixes - -* Separated `n_points` from `n_jobs` in `BayesSearchCV`. -* Dimensions now support boolean np.arrays. - -### Maintenance - -* `matplotlib` is now an optional requirement (install with `pip install 'scikit-optimize[plots]'`) - -## Version 0.5 - -High five! - -### New features - -* Single element dimension definition, which can be used to -fix the value of a dimension during optimization. -* `total_iterations` property of `BayesSearchCV` that -counts total iterations needed to explore all subspaces. -* Add iteration event handler for `BayesSearchCV`, useful -for early stopping inside `BayesSearchCV` search loop. -* added `utils.use_named_args` decorator to help with unpacking named dimensions -when calling an objective function. - -### Bug fixes - -* Removed redundant estimator fitting inside `BayesSearchCV`. -* Fixed the log10 transform for Real dimensions that would lead to values being - out of bounds. - -## Version 0.4 - -Go forth! - -### New features - -* Support early stopping of optimization loop. -* Benchmarking scripts to evaluate performance of different surrogate models. -* Support for parallel evaluations of the objective function via several - constant liar stategies. -* BayesSearchCV as a drop in replacement for scikit-learn's GridSearchCV. -* New acquisition functions "EIps" and "PIps" that takes into account - function compute time. - -### Bug fixes - -* Fixed inference of dimensions of type Real. - -### API changes - -* Change interface of GradientBoostingQuantileRegressor's predict method to - match return type of other regressors -* Dimensions of type Real are now inclusive of upper bound. - - -## Version 0.3 - -Third time's a charm. - -### New features - -* Accuracy improvements of the optimization of the acquisition function -by pre-selecting good candidates as starting points when -using `acq_optimizer='lbfgs'`. -* Support a ask-and-tell interface. Check out the `Optimizer` class if you need -fine grained control over the iterations. -* Parallelize L-BFGS minimization runs over the acquisition function. -* Implement weighted hamming distance kernel for problems with only categorical dimensions. -* New acquisition function `gp_hedge` that probabilistically chooses one of `EI`, `PI` -or `LCB` at every iteration depending upon the cumulative gain. - -### Bug fixes -* Warnings are now raised if a point is chosen as the candidate optimum multiple -times. -* Infinite gradients that were raised in the kernel gradient computation are -now fixed. -* Integer dimensions are now normalized to [0, 1] internally in `gp_minimize`. - -### API Changes. -* The default `acq_optimizer` function has changed from `"auto"` to `"lbfgs"` -in `gp_minimize`. - - -## Version 0.2 - -### New features - -* Speed improvements when using `gp_minimize` with `acq_optimizer='lbfgs'` and -`acq_optimizer='auto'` when all the search-space dimensions are Real. -* Persistence of minimization results using `skopt.dump` and `skopt.load`. -* Support for using arbitrary estimators that implement a -`return_std` argument in their `predict` method by means of `base_minimize` from `skopt.optimizer.` -* Support for tuning noise in `gp_minimize` using the `noise` argument. -* `TimerCallback` in `skopt.callbacks` to log the time between iterations of -the minimization loop. - - -## Version 0.1 - -First light! - -### New features - -* Bayesian optimization via `gp_minimize`. -* Tree-based sequential model-based optimization via `forest_minimize` and `gbrt_minimize`, with support for multi-threading. -* Support of LCB, EI and PI as acquisition functions. -* Plotting functions for inspecting convergence, evaluations and the objective function. -* API for specifying and sampling from a parameter space. - +See https://scikit-optimize.github.io/dev/whats_new.html # Contributors diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..0dbeb1883 --- /dev/null +++ b/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2016-2020 The scikit-optimize developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index cf5772813..000000000 --- a/LICENSE.md +++ /dev/null @@ -1,32 +0,0 @@ -New BSD License - -Copyright (c) 2016-2020 The scikit-optimize developers. - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - a. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - b. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - c. Neither the name of the scikit-optimize developers nor the names of - its contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH -DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in index 95de3d03e..f0036cd4a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,4 +2,6 @@ include *.md include *.rst recursive-include doc * recursive-include examples * -include README.rst \ No newline at end of file +include LICENSE +include README.rst +include pyproject.toml \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..13d6b3799 --- /dev/null +++ b/Makefile @@ -0,0 +1,67 @@ +# simple makefile to simplify repetitive build env management tasks under posix + +# caution: testing won't work on windows, see README + +PYTHON ?= python +CYTHON ?= cython +PYTEST ?= pytest +CTAGS ?= ctags + +# skip doctests on 32bit python +BITS := $(shell python -c 'import struct; print(8 * struct.calcsize("P"))') + +all: clean inplace test + +clean-ctags: + rm -f tags + +clean: clean-ctags + $(PYTHON) setup.py clean + rm -rf dist + # TODO: Remove in when all modules are removed. + $(PYTHON) sklearn/_build_utils/deprecated_modules.py + +in: inplace # just a shortcut +inplace: + $(PYTHON) setup.py build_ext -i + +test-code: in + $(PYTEST) --showlocals -v skopt --durations=20 +test-sphinxext: + $(PYTEST) --showlocals -v doc/sphinxext/ +test-doc: +ifeq ($(BITS),64) + $(PYTEST) $(shell find doc -name '*.rst' | sort) +endif +test-code-parallel: in + $(PYTEST) -n auto --showlocals -v skopt --durations=20 + +test-coverage: + rm -rf coverage .coverage + $(PYTEST) skopt --showlocals -v --cov=sklearn --cov-report=html:coverage +test-coverage-parallel: + rm -rf coverage .coverage .coverage.* + $(PYTEST) skopt -n auto --showlocals -v --cov=sklearn --cov-report=html:coverage + +test: test-code test-sphinxext test-doc + +trailing-spaces: + find skopt -name "*.py" -exec perl -pi -e 's/[ \t]*$$//' {} \; + +ctags: + # make tags for symbol based navigation in emacs and vim + # Install with: sudo apt-get install exuberant-ctags + $(CTAGS) --python-kinds=-i -R skopt + +doc: inplace + $(MAKE) -C doc html + +doc-noplot: inplace + $(MAKE) -C doc html-noplot + +code-analysis: + flake8 sklearn | grep -v __init__ | grep -v external + pylint -E -i y skopt/ -d E1103,E0611,E1101 + +flake8-diff: + ./build_tools/circle/linting.sh diff --git a/README.rst b/README.rst index e6bbe971f..9ba472283 100644 --- a/README.rst +++ b/README.rst @@ -29,8 +29,7 @@ Important links - Static documentation - `Static documentation `__ -- Example notebooks - can be found in the - `examples directory `_. +- Example notebooks - can be found in examples_. - Issue tracker - https://github.com/scikit-optimize/scikit-optimize/issues - Releases - https://pypi.python.org/pypi/scikit-optimize @@ -38,8 +37,16 @@ Important links Install ------- -The latest released version of scikit-optimize is v0.7.1, which you can install -with: +scikit-optimize requires + +* Python >= 3.6 +* NumPy (>= 1.13.3) +* SciPy (>= 0.19.1) +* joblib (>= 0.11) +* scikit-learn >= 0.20 +* matplotlib >= 2.0.0 + +You can install the latest release with: :: pip install scikit-optimize @@ -97,9 +104,8 @@ class: Read our `introduction to bayesian -optimization `__ -and the other -`examples `__. +optimization `__ +and the other examples_. Development @@ -107,7 +113,7 @@ Development The library is still experimental and under heavy development. Checkout the `next -milestone `__ +milestone `__ for the plans for the next release or look at some `easy issues `__ to get started contributing. @@ -139,7 +145,7 @@ create a new issue and work through the following checklist: * update the version tag in ``__init__.py`` * update the version tag mentioned in the README * check if the dependencies in ``setup.py`` are valid or need unpinning -* check that the ``CHANGELOG.md`` is up to date +* check that the ``doc/whats_new/v0.X.rst`` is up to date * did the last build of master succeed? * create a `new release `__ * ping `conda-forge `__ @@ -198,3 +204,4 @@ recognition, feel free to add them to the "Made possible by" list. :target: https://gitter.im/scikit-optimize/Lobby .. |Zenodo DOI| image:: https://zenodo.org/badge/54340642.svg :target: https://zenodo.org/badge/latestdoi/54340642 +.. _examples: https://scikit-optimize.github.io/stable/auto_examples/index.html diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh new file mode 100755 index 000000000..e3e57484f --- /dev/null +++ b/build_tools/circle/build_doc.sh @@ -0,0 +1,260 @@ +#!/usr/bin/env bash +set -x +set -e +# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/build_doc.sh +# The scikit-learn developers. +# License: BSD-style +# +# Decide what kind of documentation build to run, and run it. +# +# If the last commit message has a "[doc skip]" marker, do not build +# the doc. On the contrary if a "[doc build]" marker is found, build the doc +# instead of relying on the subsequent rules. +# +# We always build the documentation for jobs that are not related to a specific +# PR (e.g. a merge to master or a maintenance branch). +# +# If this is a PR, do a full build if there are some files in this PR that are +# under the "doc/" or "examples/" folders, otherwise perform a quick build. +# +# If the inspection of the current commit fails for any reason, the default +# behavior is to quick build the documentation. + +get_build_type() { + if [ -z "$CIRCLE_SHA1" ] + then + echo SKIP: undefined CIRCLE_SHA1 + return + fi + commit_msg=$(git log --format=%B -n 1 $CIRCLE_SHA1) + if [ -z "$commit_msg" ] + then + echo QUICK BUILD: failed to inspect commit $CIRCLE_SHA1 + return + fi + if [[ "$commit_msg" =~ \[doc\ skip\] ]] + then + echo SKIP: [doc skip] marker found + return + fi + if [[ "$commit_msg" =~ \[doc\ quick\] ]] + then + echo QUICK: [doc quick] marker found + return + fi + if [[ "$commit_msg" =~ \[doc\ build\] ]] + then + echo BUILD: [doc build] marker found + return + fi + if [ -z "$CI_PULL_REQUEST" ] + then + echo BUILD: not a pull request + return + fi + git_range="origin/master...$CIRCLE_SHA1" + git fetch origin master >&2 || (echo QUICK BUILD: failed to get changed filenames for $git_range; return) + filenames=$(git diff --name-only $git_range) + if [ -z "$filenames" ] + then + echo QUICK BUILD: no changed filenames for $git_range + return + fi + changed_examples=$(echo "$filenames" | grep -E "^examples/(.*/)*plot_") + + # The following is used to extract the list of filenames of example python + # files that sphinx-gallery needs to run to generate png files used as + # figures or images in the .rst files from the documentation. + # If the contributor changes a .rst file in a PR we need to run all + # the examples mentioned in that file to get sphinx build the + # documentation without generating spurious warnings related to missing + # png files. + + if [[ -n "$filenames" ]] + then + # get rst files + rst_files="$(echo "$filenames" | grep -E "rst$")" + + # get lines with figure or images + img_fig_lines="$(echo "$rst_files" | xargs grep -shE "(figure|image)::")" + + # get only auto_examples + auto_example_files="$(echo "$img_fig_lines" | grep auto_examples | awk -F "/" '{print $NF}')" + + # remove "sphx_glr_" from path and accept replace _(\d\d\d|thumb).png with .py + scripts_names="$(echo "$auto_example_files" | sed 's/sphx_glr_//' | sed -E 's/_([[:digit:]][[:digit:]][[:digit:]]|thumb).png/.py/')" + + # get unique values + examples_in_rst="$(echo "$scripts_names" | uniq )" + fi + + # executed only if there are examples in the modified rst files + if [[ -n "$examples_in_rst" ]] + then + if [[ -n "$changed_examples" ]] + then + changed_examples="$changed_examples|$examples_in_rst" + else + changed_examples="$examples_in_rst" + fi + fi + + if [[ -n "$changed_examples" ]] + then + echo BUILD: detected examples/ filename modified in $git_range: $changed_examples + pattern=$(echo "$changed_examples" | paste -sd '|') + # pattern for examples to run is the last line of output + echo "$pattern" + return + fi + echo QUICK BUILD: no examples/ filename modified in $git_range: + echo "$filenames" +} + +build_type=$(get_build_type) +if [[ "$build_type" =~ ^SKIP ]] +then + exit 0 +fi + +if [[ "$CIRCLE_BRANCH" =~ ^master$|^[0-9]+\.[0-9]+\.X$ && -z "$CI_PULL_REQUEST" ]] +then + # PDF linked into HTML + make_args="dist LATEXMKOPTS=-halt-on-error" +elif [[ "$build_type" =~ ^'BUILD: detected examples' ]] +then + # pattern for examples to run is the last line of output + pattern=$(echo "$build_type" | tail -n 1) + make_args="html EXAMPLES_PATTERN=$pattern" +else + make_args=html +fi + +make_args="SPHINXOPTS=-T $make_args" # show full traceback on exception + +# Installing required system packages to support the rendering of math +# notation in the HTML documentation +sudo -E apt-get -yq update +sudo -E apt-get -yq remove texlive-binaries --purge +sudo -E apt-get -yq --no-install-suggests --no-install-recommends \ + install dvipng texlive-latex-base texlive-latex-extra \ + texlive-latex-recommended texlive-fonts-recommended \ + latexmk tex-gyre gsfonts ccache + +# deactivate circleci virtualenv and setup a miniconda env instead +if [[ `type -t deactivate` ]]; then + deactivate +fi + +# Install dependencies with miniconda +wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + -O miniconda.sh +chmod +x miniconda.sh && ./miniconda.sh -b -p $MINICONDA_PATH +export PATH="/usr/lib/ccache:$MINICONDA_PATH/bin:$PATH" + +ccache -M 512M +export CCACHE_COMPRESS=1 + +# Old packages coming from the 'free' conda channel have been removed but we +# are using them for our min-dependencies doc generation. See +# https://www.anaconda.com/why-we-removed-the-free-channel-in-conda-4-7/ for +# more details. +if [[ "$CIRCLE_JOB" == "doc-min-dependencies" ]]; then + conda config --set restore_free_channel true +fi + +# packaging won't be needed once setuptools starts shipping packaging>=17.0 +conda create -n $CONDA_ENV_NAME --yes --quiet python="${PYTHON_VERSION:-*}" \ + numpy scipy \ + cython pytest coverage \ + matplotlib sphinx pillow \ + scikit-image pandas \ + joblib memory_profiler packaging + +export MPLBACKEND="agg" +source activate testenv +pip install sphinx-gallery +pip install numpydoc + +# Build and install scikit-optimize in dev mode +python setup.py build_ext --inplace -j 3 +python setup.py develop + +export OMP_NUM_THREADS=1 + +if [[ "$CIRCLE_BRANCH" =~ ^master$ && -z "$CI_PULL_REQUEST" ]] +then + # List available documentation versions if on master + python build_tools/circle/list_versions.py > doc/versions.rst +fi + +# Install this noise maker on CircleCI to prevent +# "Too long with no output (exceeded 10m0s): context deadline exceeded" +while true; do sleep $((60 * 5)); echo -e '\nStill working ...\n'; done & +noise_maker=$! + +# The pipefail is requested to propagate exit code +set -o pipefail && cd doc && make $make_args 2>&1 | tee ~/log.txt + +kill $noise_maker + +# Insert the version warning for deployment +find _build/html/stable -name "*.html" | xargs sed -i '/<\/body>/ i \ +\ ' + +cd - +set +o pipefail + +affected_doc_paths() { + files=$(git diff --name-only origin/master...$CIRCLE_SHA1) + echo "$files" | grep ^doc/.*\.rst | sed 's/^doc\/\(.*\)\.rst$/\1.html/' + echo "$files" | grep ^examples/.*.py | sed 's/^\(.*\)\.py$/auto_\1.html/' + skopt_files=$(echo "$files" | grep '^skopt/') + if [ -n "$skopt_files" ] + then + grep -hlR -f<(echo "$skopt_files" | sed 's/^/scikit-optimize\/blob\/[a-z0-9]*\//') doc/_build/html/stable/modules/generated | cut -d/ -f5- + fi +} + +affected_doc_warnings() { + files=$(git diff --name-only origin/master...$CIRCLE_SHA1) + # Look for sphinx warnings only in files affected by the PR + if [ -n "$files" ] + then + for af in ${files[@]} + do + warn+=`grep WARNING ~/log.txt | grep $af` + done + fi + echo "$warn" +} + +if [ -n "$CI_PULL_REQUEST" ] +then + echo "The following documentation warnings may have been generated by PR #$CI_PULL_REQUEST:" + warnings=$(affected_doc_warnings) + if [ -z "$warnings" ] + then + warnings="/home/circleci/project/ no warnings" + fi + echo "$warnings" + + echo "The following documentation files may have been changed by PR #$CI_PULL_REQUEST:" + affected=$(affected_doc_paths) + echo "$affected" + ( + echo '
    ' + echo "$affected" | sed 's|.*|
  • & [dev, stable]
  • |' + echo '

General: Home | API Reference | Examples

' + echo 'Sphinx Warnings in affected files
    ' + echo "$warnings" | sed 's/\/home\/circleci\/project\//
  • /g' + echo '
' + ) > 'doc/_build/html/stable/_changed.html' + + if [ "$warnings" != "/home/circleci/project/ no warnings" ] + then + echo "Sphinx generated warnings when building the documentation related to files modified in this PR." + echo "Please check doc/_build/html/stable/_changed.html" + exit 1 + fi +fi diff --git a/build_tools/circle/build_test_pypy.sh b/build_tools/circle/build_test_pypy.sh new file mode 100755 index 000000000..c858d9add --- /dev/null +++ b/build_tools/circle/build_test_pypy.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -x +set -e +# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/build_test_pypy.sh +# The scikit-learn developers. +# License: BSD-style +apt-get -yq update +apt-get -yq install libatlas-base-dev liblapack-dev gfortran ccache libopenblas-dev + +pip install virtualenv + +if command -v pypy3; then + virtualenv -p $(command -v pypy3) pypy-env +elif command -v pypy; then + virtualenv -p $(command -v pypy) pypy-env +fi + +source pypy-env/bin/activate + +python --version +which python + +pip install -U pip + +# pins versions to install wheel from https://antocuni.github.io/pypy-wheels/manylinux2010 +pip install --extra-index-url https://antocuni.github.io/pypy-wheels/manylinux2010 numpy==1.18.0 scipy==1.3.2 + +# Install Cython directly +pip install https://antocuni.github.io/pypy-wheels/ubuntu/Cython/Cython-0.29.14-py3-none-any.whl +pip install sphinx numpydoc docutils joblib pillow pytest matplotlib + +ccache -M 512M +export CCACHE_COMPRESS=1 +export PATH=/usr/lib/ccache:$PATH +export LOKY_MAX_CPU_COUNT="2" +export OMP_NUM_THREADS="1" + +python setup.py build_ext --inplace -j 3 +pip install --no-build-isolation -e . + +# Check that Python implementation is PyPy +python - << EOL +import platform +from skopt import IS_PYPY +assert IS_PYPY is True, "platform={}!=PyPy".format(platform.python_implementation()) +EOL + +python -m pytest skopt/ +python -m pytest doc/sphinxext/ +python -m pytest $(find doc -name '*.rst' | sort) \ No newline at end of file diff --git a/build_tools/circle/checkout_merge_commit.sh b/build_tools/circle/checkout_merge_commit.sh new file mode 100755 index 000000000..73947081b --- /dev/null +++ b/build_tools/circle/checkout_merge_commit.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/checkout_merge_commit.sh +# The scikit-learn developers. +# License: BSD-style + +# Add `master` branch to the update list. +# Otherwise CircleCI will give us a cached one. +FETCH_REFS="+master:master" + +# Update PR refs for testing. +if [[ -n "${CIRCLE_PR_NUMBER}" ]] +then + FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/head:pr/${CIRCLE_PR_NUMBER}/head" + FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/merge:pr/${CIRCLE_PR_NUMBER}/merge" +fi + +# Retrieve the refs. +git fetch -u origin ${FETCH_REFS} + +# Checkout the PR merge ref. +if [[ -n "${CIRCLE_PR_NUMBER}" ]] +then + git checkout -qf "pr/${CIRCLE_PR_NUMBER}/merge" || ( + echo Could not fetch merge commit. >&2 + echo There may be conflicts in merging PR \#${CIRCLE_PR_NUMBER} with master. >&2; + exit 1) +fi + +# Check for merge conflicts. +if [[ -n "${CIRCLE_PR_NUMBER}" ]] +then + git branch --merged | grep master > /dev/null + git branch --merged | grep "pr/${CIRCLE_PR_NUMBER}/head" > /dev/null +fi \ No newline at end of file diff --git a/build_tools/circle/deploy.sh b/build_tools/circle/deploy.sh deleted file mode 100644 index 824f7ba23..000000000 --- a/build_tools/circle/deploy.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -# Almost copied verbatim from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/push_doc.sh -export SKOPT_HOME=$(pwd) - -if [ -z $CIRCLE_PROJECT_USERNAME ]; -then USERNAME="skoptci"; -else USERNAME=$CIRCLE_PROJECT_USERNAME; -fi - -MSG="Pushing the docs for revision for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1" - -# Copying to github pages -echo "Copying built files" -git clone -b master "git@github.com:scikit-optimize/scikit-optimize.github.io" deploy -cd deploy -git rm -r space -git rm -r optimizer -git rm -r learning -cd .. -for entry in ${HOME}/doc/skopt/* -do - echo "$entry" -done - -cp -r ${HOME}/doc/skopt/* deploy -# Move into deployment directory -cd deploy - -# Commit changes, allowing empty changes (when unchanged) -echo "Committing and pushing to Github" -echo "$USERNAME" -git config --global user.name $USERNAME -git config --global user.email "skoptci@gmail.com" -git config --global push.default matching -git add -A -git commit --allow-empty -m "$MSG" -git push - -echo "$MSG" diff --git a/build_tools/circle/execute.sh b/build_tools/circle/execute.sh deleted file mode 100644 index 28006266a..000000000 --- a/build_tools/circle/execute.sh +++ /dev/null @@ -1,19 +0,0 @@ -source activate testenv -export SKOPT_HOME=$(pwd) - -python --version -python -c "import numpy; print('numpy %s' % numpy.__version__)" -python -c "import scipy; print('scipy %s' % scipy.__version__)" - - -mkdir -p ${HOME}/doc/skopt - -cd ~ -cd ${SKOPT_HOME}/doc && sphinx-build -M html ${SKOPT_HOME}/doc ${SKOPT_HOME}/doc/_build # -W --keep-going - -for entry in ${SKOPT_HOME}/doc/_build/* -do - echo "$entry" -done - -cp -r ${SKOPT_HOME}/doc/_build/html/* ${HOME}/doc/skopt diff --git a/build_tools/circle/install.sh b/build_tools/circle/install.sh deleted file mode 100644 index f0a67d38c..000000000 --- a/build_tools/circle/install.sh +++ /dev/null @@ -1,12 +0,0 @@ -conda update -n base conda -conda create -n testenv --yes python pip pytest nose -source activate testenv - -python -m pip install -e '.[plots]' -export SKOPT_HOME=$(pwd) - -python -m pip install sphinx sphinx-gallery numpydoc memory_profiler - -# importing matplotlib once builds the font caches. This avoids -# having warnings in our example notebooks -python -c "import matplotlib.pyplot as plt" diff --git a/build_tools/circle/linting.sh b/build_tools/circle/linting.sh new file mode 100755 index 000000000..bdeeaa0cd --- /dev/null +++ b/build_tools/circle/linting.sh @@ -0,0 +1,159 @@ +#!/bin/bash +# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/linting.sh +# The scikit-learn developers. +# License: BSD-style +# +# This script is used in CircleCI to check that PRs do not add obvious +# flake8 violations. It relies on two things: +# - find common ancestor between branch and +# scikit-optimize/scikit-optimize remote +# - run flake8 --diff on the diff between the branch and the common +# ancestor +# +# Additional features: +# - the line numbers in Travis match the local branch on the PR +# author machine. +# - ./build_tools/circle/flake8_diff.sh can be run locally for quick +# turn-around + +set -e +# pipefail is necessary to propagate exit codes +set -o pipefail + +PROJECT=scikit-optimize/scikit-optimize +PROJECT_URL=https://github.com/$PROJECT.git + +# Find the remote with the project name (upstream in most cases) +REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '') + +# Add a temporary remote if needed. For example this is necessary when +# Travis is configured to run in a fork. In this case 'origin' is the +# fork and not the reference repo we want to diff against. +if [[ -z "$REMOTE" ]]; then + TMP_REMOTE=tmp_reference_upstream + REMOTE=$TMP_REMOTE + git remote add $REMOTE $PROJECT_URL +fi + +echo "Remotes:" +echo '--------------------------------------------------------------------------------' +git remote --verbose + +# Travis does the git clone with a limited depth (50 at the time of +# writing). This may not be enough to find the common ancestor with +# $REMOTE/master so we unshallow the git checkout +if [[ -a .git/shallow ]]; then + echo -e '\nTrying to unshallow the repo:' + echo '--------------------------------------------------------------------------------' + git fetch --unshallow +fi + +if [[ "$TRAVIS" == "true" ]]; then + if [[ "$TRAVIS_PULL_REQUEST" == "false" ]] + then + # In main repo, using TRAVIS_COMMIT_RANGE to test the commits + # that were pushed into a branch + if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then + if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then + echo "New branch, no commit range from Travis so passing this test by convention" + exit 0 + fi + COMMIT_RANGE=$TRAVIS_COMMIT_RANGE + fi + else + # We want to fetch the code as it is in the PR branch and not + # the result of the merge into master. This way line numbers + # reported by Travis will match with the local code. + LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST + # In Travis the PR target is always origin + git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF + fi +fi + +# If not using the commit range from Travis we need to find the common +# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master +if [[ -z "$COMMIT_RANGE" ]]; then + if [[ -z "$LOCAL_BRANCH_REF" ]]; then + LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD) + fi + echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:" + echo '--------------------------------------------------------------------------------' + git --no-pager log -2 $LOCAL_BRANCH_REF + + REMOTE_MASTER_REF="$REMOTE/master" + # Make sure that $REMOTE_MASTER_REF is a valid reference + echo -e "\nFetching $REMOTE_MASTER_REF" + echo '--------------------------------------------------------------------------------' + git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF + LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF) + REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF) + + COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \ + echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)" + + if [ -z "$COMMIT" ]; then + exit 1 + fi + + COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT) + + echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\ + "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:" + echo '--------------------------------------------------------------------------------' + git --no-pager show --no-patch $COMMIT_SHORT_HASH + + COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH" + + if [[ -n "$TMP_REMOTE" ]]; then + git remote remove $TMP_REMOTE + fi + +else + echo "Got the commit range from Travis: $COMMIT_RANGE" +fi + +echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \ + "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):" +echo '--------------------------------------------------------------------------------' + +# We need the following command to exit with 0 hence the echo in case +# there is no match +MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE | \ + grep -v 'doc/sphinxext' || echo "no_match")" + +check_files() { + files="$1" + shift + options="$*" + if [ -n "$files" ]; then + # Conservative approach: diff without context (--unified=0) so that code + # that was not changed does not create failures + git diff --unified=0 $COMMIT_RANGE -- $files | flake8 --diff --show-source $options + fi +} + +if [[ "$MODIFIED_FILES" == "no_match" ]]; then + echo "No file outside doc/sphinxext has been modified" +else + + check_files "$(echo "$MODIFIED_FILES" | grep -v ^examples)" + check_files "$(echo "$MODIFIED_FILES" | grep ^examples)" \ + --config ./examples/.flake8 +fi +echo -e "No problem detected by flake8\n" + +# For docstrings and warnings of deprecated attributes to be rendered +# properly, the property decorator must come before the deprecated decorator +# (else they are treated as functions) + +# do not error when grep -B1 "@property" finds nothing +set +e +bad_deprecation_property_order=`git grep -A 10 "@property" -- "*.py" | awk '/@property/,/def /' | grep -B1 "@deprecated"` + +if [ ! -z "$bad_deprecation_property_order" ] +then + echo "property decorator should come before deprecated decorator" + echo "found the following occurrencies:" + echo $bad_deprecation_property_order + exit 1 +fi \ No newline at end of file diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py new file mode 100644 index 000000000..b63db3cc2 --- /dev/null +++ b/build_tools/circle/list_versions.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/ +# build_tools/circle/list_versions.sh +# The scikit-learn developers. +# License: BSD-style +# List all available versions of the documentation +import json +import re +import sys + +from distutils.version import LooseVersion +from urllib.request import urlopen + + +def json_urlread(url): + try: + return json.loads(urlopen(url).read().decode('utf8')) + except Exception: + print('Error reading', url, file=sys.stderr) + raise + + +def human_readable_data_quantity(quantity, multiple=1024): + # https://stackoverflow.com/questions/1094841/ + # reusable-library-to-get-human-readable-version-of-file-size + if quantity == 0: + quantity = +0 + SUFFIXES = ["B"] + [i + {1000: "B", 1024: "iB"}[multiple] + for i in "KMGTPEZY"] + for suffix in SUFFIXES: + if quantity < multiple or suffix == SUFFIXES[-1]: + if suffix == SUFFIXES[0]: + return "%d %s" % (quantity, suffix) + else: + return "%.1f %s" % (quantity, suffix) + else: + quantity /= multiple + + +def get_pdf_size(version): + api_url = ROOT_URL + '%s/_downloads' % version + for path_details in json_urlread(api_url): + if path_details['name'] == 'scikit-optimize-docs.pdf': + return human_readable_data_quantity(path_details['size'], 1000) + + +print(':orphan:') +print() +heading = 'Available documentation for Scikit-optimize' +print(heading) +print('=' * len(heading)) +print() +print('Web-based documentation is available for versions listed below:') +print() + +ROOT_URL = 'https://api.github.com/repos/scikit-optimize/scikit-optimize.github.io/contents/' # noqa +RAW_FMT = 'https://raw.githubusercontent.com/scikit-optimize/scikit-optimize.github.io/master/%s/index.html' # noqa +VERSION_RE = re.compile(r"scikit-optimize ([\w\.\-]+) documentation") +NAMED_DIRS = ['dev', 'stable'] + +# Gather data for each version directory, including symlinks +dirs = {} +symlinks = {} +root_listing = json_urlread(ROOT_URL) +for path_details in root_listing: + name = path_details['name'] + if not (name[:1].isdigit() or name in NAMED_DIRS): + continue + if path_details['type'] == 'dir': + html = urlopen(RAW_FMT % name).read().decode('utf8') + version_num = VERSION_RE.search(html).group(1) + pdf_size = get_pdf_size(name) + dirs[name] = (version_num, pdf_size) + + if path_details['type'] == 'symlink': + symlinks[name] = json_urlread(path_details['_links']['self'])['target'] + + +# Symlinks should have same data as target +for src, dst in symlinks.items(): + if dst in dirs: + dirs[src] = dirs[dst] + +# Output in order: dev, stable, decreasing other version +seen = set() +for name in (NAMED_DIRS + + sorted((k for k in dirs if k[:1].isdigit()), + key=LooseVersion, reverse=True)): + if name not in dirs: + continue + version_num, pdf_size = dirs[name] + if version_num in seen: + # symlink came first + continue + else: + seen.add(version_num) + name_display = '' if name[:1].isdigit() else ' (%s)' % name + path = 'https://scikit-optimize.github.io/%s/' % name + out = ('* `Scikit-optimize %s%s documentation <%s>`_' + % (version_num, name_display, path)) + if pdf_size is not None: + out += (' (`PDF %s <%s/_downloads/scikit-optimize-docs.pdf>`_)' + % (pdf_size, path)) + print(out) diff --git a/build_tools/circle/push_doc.sh b/build_tools/circle/push_doc.sh new file mode 100755 index 000000000..1c8eae252 --- /dev/null +++ b/build_tools/circle/push_doc.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/circle/push_doc.sh +# The scikit-learn developers. +# License: BSD-style +# +# This script is meant to be called in the "deploy" step defined in +# circle.yml. See https://circleci.com/docs/ for more details. +# The behavior of the script is controlled by environment variable defined +# in the circle.yml in the top level folder of the project. + +set -ex + +if [ -z $CIRCLE_PROJECT_USERNAME ]; +then USERNAME="skoptci"; +else USERNAME=$CIRCLE_PROJECT_USERNAME; +fi + +DOC_REPO="scikit-optimize.github.io" +GENERATED_DOC_DIR=$1 + +if [[ -z "$GENERATED_DOC_DIR" ]]; then + echo "Need to pass directory of the generated doc as argument" + echo "Usage: $0 " + exit 1 +fi + +# Absolute path needed because we use cd further down in this script +GENERATED_DOC_DIR=$(readlink -f $GENERATED_DOC_DIR) + +if [ "$CIRCLE_BRANCH" = "master" ] +then + dir=dev +else + # Strip off .X + dir="${CIRCLE_BRANCH::-2}" +fi + +MSG="Pushing the docs to $dir/ for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1" + +cd $HOME +if [ ! -d $DOC_REPO ]; +then git clone --depth 1 --no-checkout "git@github.com:scikit-optimize/"$DOC_REPO".git"; +fi +cd $DOC_REPO + +# check if it's a new branch + +echo $dir > .git/info/sparse-checkout +if ! git show HEAD:$dir >/dev/null +then + # directory does not exist. Need to make it so sparse checkout works + mkdir $dir + touch $dir/index.html + git add $dir +fi +git checkout master +git reset --hard origin/master +if [ -d $dir ] +then + git rm -rf $dir/ && rm -rf $dir/ +fi +cp -R $GENERATED_DOC_DIR $dir +git config user.email "skoptci@gmail.com" +git config user.name $USERNAME +git config push.default matching +git add -f $dir/ +git commit -m "$MSG" $dir +git push +echo $MSG \ No newline at end of file diff --git a/build_tools/travis/after_success.sh b/build_tools/travis/after_success.sh new file mode 100644 index 000000000..494f86b6e --- /dev/null +++ b/build_tools/travis/after_success.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# This script is meant to be called by the "after_success" step defined in +# .travis.yml. See https://docs.travis-ci.com/ for more details. + +# License: 3-clause BSD + +set -e + +if [[ "$COVERAGE" == "true" ]]; then + # Need to run codecov from a git checkout, so we copy .coverage + # from TEST_DIR where pytest has been run + cp $TEST_DIR/.coverage $TRAVIS_BUILD_DIR + + # Ignore codecov failures as the codecov server is not + # very reliable but we don't want travis to report a failure + # in the github UI just because the coverage report failed to + # be published. + codecov --root $TRAVIS_BUILD_DIR || echo "codecov upload failed" +fi \ No newline at end of file diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index 04cd1bf92..f6237c0a1 100644 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -64,7 +64,12 @@ if [[ "$COVERAGE" == "true" ]]; then pip install pytest-cov coverage coveralls fi -pip install -e '.[plots]' +if [[ "$SDIST" == "true" ]]; then + python setup.py sdist + pip install twine +else + pip install -e '.[plots]' +fi python --version python -c "import numpy; print('numpy %s' % numpy.__version__)" python -c "import scipy; print('scipy %s' % scipy.__version__)" diff --git a/build_tools/travis/test_docs.sh b/build_tools/travis/test_docs.sh new file mode 100644 index 000000000..3df03926f --- /dev/null +++ b/build_tools/travis/test_docs.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -e +set -x +if [[ "$SDIST" != "true" ]]; then + make test-doc +fi diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh new file mode 100644 index 000000000..42eb17249 --- /dev/null +++ b/build_tools/travis/test_script.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# +# Copied from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/travis/test_script.sh +# The scikit-learn developers. +# License: BSD-style +# +# This script is meant to be called by the "script" step defined in +# .travis.yml. See https://docs.travis-ci.com/ for more details. +# The behavior of the script is controlled by environment variabled defined +# in the .travis.yml in the top level folder of the project. + +# License: 3-clause BSD + +set -e + +python --version +python -c "import numpy; print('numpy %s' % numpy.__version__)" +python -c "import scipy; print('scipy %s' % scipy.__version__)" +python -c "import sklearn; print('sklearn %s' % sklearn.__version__)" +python -c "\ +try: + import skopt + print('skopt %s' % skopt.__version__) +except ImportError: + pass +" +python -c "import multiprocessing as mp; print('%d CPUs' % mp.cpu_count())" + +run_tests() { + TEST_CMD="pytest --showlocals --durations=20 --pyargs" + + # Get into a temp directory to run test from the installed scikit-learn and + # check if we do not leave artifacts + mkdir -p $TEST_DIR + # We need the setup.cfg for the pytest settings + cp setup.cfg $TEST_DIR + cd $TEST_DIR + + # Skip tests that require large downloads over the network to save bandwidth + # usage as travis workers are stateless and therefore traditional local + # disk caching does not work. + export SKOPT_SKIP_NETWORK_TESTS=1 + + if [[ "$COVERAGE" == "true" ]]; then + TEST_CMD="$TEST_CMD --cov skopt" + fi + + if [[ -n "$CHECK_WARNINGS" ]]; then + TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning" + fi + + set -x # print executed commands to the terminal + + $TEST_CMD skopt +} + +run_package_check() { + + TEST_CMD="twine check dist/*" + set -x + $TEST_CMD +} + +if [[ "$SDIST" == "true" ]]; then + run_package_check +else + run_tests +fi \ No newline at end of file diff --git a/conftest.py b/conftest.py new file mode 100644 index 000000000..f2a991049 --- /dev/null +++ b/conftest.py @@ -0,0 +1,83 @@ +# Even if empty this file is useful so that when running from the root folder +# ./sklearn is added to sys.path by pytest. See +# https://docs.pytest.org/en/latest/pythonpath.html for more details. For +# example, this allows to build extensions in place and run pytest +# doc/modules/clustering.rst and use sklearn from the local folder rather than +# the one from site-packages. + +import platform +import sys +from distutils.version import LooseVersion +import os + +import pytest +from _pytest.doctest import DoctestItem +from skopt import _IS_32BIT + + +PYTEST_MIN_VERSION = '3.3.0' + +if LooseVersion(pytest.__version__) < PYTEST_MIN_VERSION: + raise ImportError('Your version of pytest is too old, you should have ' + 'at least pytest >= {} installed.' + .format(PYTEST_MIN_VERSION)) + + +def pytest_addoption(parser): + parser.addoption("--skip-network", action="store_true", default=False, + help="skip network tests") + + +def pytest_collection_modifyitems(config, items): + # FeatureHasher is not compatible with PyPy + if platform.python_implementation() == 'PyPy': + skip_marker = pytest.mark.skip( + reason='FeatureHasher is not compatible with PyPy') + for item in items: + if item.name.endswith(('_hash.FeatureHasher', + 'text.HashingVectorizer')): + item.add_marker(skip_marker) + + # Skip tests which require internet if the flag is provided + if config.getoption("--skip-network"): + skip_network = pytest.mark.skip( + reason="test requires internet connectivity") + for item in items: + if "network" in item.keywords: + item.add_marker(skip_network) + + # numpy changed the str/repr formatting of numpy arrays in 1.14. We want to + # run doctests only for numpy >= 1.14. + skip_doctests = False + try: + import numpy as np + if LooseVersion(np.__version__) < LooseVersion('1.14'): + reason = 'doctests are only run for numpy >= 1.14' + skip_doctests = True + elif _IS_32BIT: + reason = ('doctest are only run when the default numpy int is ' + '64 bits.') + skip_doctests = True + elif sys.platform.startswith("win32"): + reason = ("doctests are not run for Windows because numpy arrays " + "repr is inconsistent across platforms.") + skip_doctests = True + except ImportError: + pass + + if skip_doctests: + skip_marker = pytest.mark.skip(reason=reason) + + for item in items: + if isinstance(item, DoctestItem): + item.add_marker(skip_marker) + + +def pytest_configure(config): + import sys + sys._is_pytest_session = True + + +def pytest_unconfigure(config): + import sys + del sys._is_pytest_session diff --git a/doc/Makefile b/doc/Makefile index a0e8bf588..73e661410 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,22 +1,110 @@ -# Minimal makefile for Sphinx documentation +# Makefile for Sphinx documentation # # You can set these variables from the command line. -SPHINXOPTS = -W --keep-going -SPHINXBUILD = sphinx-build -SPHINXPROJ = scikit-optimize -SOURCEDIR = source -BUILDDIR = build +SPHINXOPTS = -j auto +SPHINXBUILD ?= sphinx-build +PAPER = +BUILDDIR = _build +ifneq ($(EXAMPLES_PATTERN),) + EXAMPLES_PATTERN_OPTS := -D sphinx_gallery_conf.filename_pattern="$(EXAMPLES_PATTERN)" +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -T -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\ + $(EXAMPLES_PATTERN_OPTS) . + + +.PHONY: help clean html dirhtml pickle json latex latexpdf changes linkcheck doctest optipng + +all: html-noplot -# Put it first so that "make" without argument is like "make help". help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + -rm -rf auto_examples/ + -rm -rf generated/* + -rm -rf modules/generated/ + +html: + # These two lines make the build a bit more lengthy, and the + # the embedding of images more robust + rm -rf $(BUILDDIR)/html/_images + #rm -rf _build/doctrees/ + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html/stable + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html/stable" + +html-noplot: + $(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html/stable + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html/stable." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + make -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +# download-data: +# python -c "from sklearn.datasets._lfw import _check_fetch_lfw; _check_fetch_lfw()" -.PHONY: help Makefile +# Optimize PNG files. Needs OptiPNG. Change the -P argument to the number of +# cores you have available, so -P 64 if you have a real computer ;) +optipng: + find _build auto_examples */generated -name '*.png' -print0 \ + | xargs -0 -n 1 -P 4 optipng -o10 -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - mkdir ./source/notebooks - cp -r ../examples/* ./source/notebooks - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) +dist: html latexpdf + cp _build/latex/scikit-optimize.pdf _build/html/stable/_downloads/scikit-optimize-docs.pdf diff --git a/doc/conf.py b/doc/conf.py index 780b5dbd8..370802f18 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,33 +15,33 @@ # import os # import sys # sys.path.insert(0, os.path.abspath('.')) - import warnings import os import re +from packaging.version import parse # import pkg_resources import sys import skopt sys.path.insert(0, os.path.abspath('sphinxext')) - from github_link import make_linkcode_resolve import sphinx_gallery -__version__ = ".".join(skopt.__version__.split(".")[:2]) + # __version__ = pkg_resources.get_distribution('skopt').version on_rtd = os.environ.get('READTHEDOCS', None) == 'True' # -- Project information ----------------------------------------------------- project = 'scikit-optimize' -copyright = '2017 - 2020, The scikit-optimize contributors.' +copyright = '2017 - 2020, scikit-optimize contributors (BSD License)' author = 'The scikit-optimize contributors' # The short X.Y version -version = __version__ +version = parse(skopt.__version__).base_version +version = ".".join(version.split(".")[:2]) # The full version, including alpha/beta/rc tags -release = __version__ +release = skopt.__version__ # -- General configuration --------------------------------------------------- @@ -308,7 +308,8 @@ def __call__(self, directory): # thumbnails for the front page of the scikit-learn home page. # key: first image in set # values: (number of plot in set, height of thumbnail) -carousel_thumbs = {'sphx_glr_plot_ask-and-tell_002.png': 600, +carousel_thumbs = {'sphx_glr_sklearn-gridsearchcv-replacement_001.png': 600, + 'sphx_glr_plot_ask-and-tell_002.png': 600, 'sphx_glr_bayesian-optimization_004.png': 600, 'sphx_glr_strategy-comparison_002.png': 600, 'sphx_glr_visualizing-results_008.png': 600} diff --git a/doc/conftest.py b/doc/conftest.py new file mode 100644 index 000000000..a52e06264 --- /dev/null +++ b/doc/conftest.py @@ -0,0 +1,12 @@ +import os +from os.path import exists +from os.path import join +import warnings + +import numpy as np + +from skopt import IS_PYPY + + +def pytest_runtest_setup(item): + fname = item.fspath.strpath diff --git a/doc/contents.rst b/doc/contents.rst index 93b93c808..9ee878ddd 100644 --- a/doc/contents.rst +++ b/doc/contents.rst @@ -14,7 +14,9 @@ Table Of Contents .. toctree:: :maxdepth: 2 + preface getting_started user_guide auto_examples/index modules/classes + development diff --git a/doc/development.rst b/doc/development.rst index 59bde45fa..d756fdbea 100644 --- a/doc/development.rst +++ b/doc/development.rst @@ -4,7 +4,7 @@ Development The library is still experimental and under heavy development. Checkout the `next -milestone `__ +milestone `__ for the plans for the next release or look at some `easy issues `__ to get started contributing. diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 39379ef45..4836c0758 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -1,8 +1,10 @@ -.. currentmodule:: skopt =============== Getting started =============== + +.. currentmodule:: skopt + Scikit-Optimize, or ``skopt``, is a simple and efficient library to minimize (very) expensive and noisy black-box functions. It implements several methods for sequential model-based optimization. ``skopt`` aims @@ -27,27 +29,29 @@ Finding a minimum Find the minimum of the noisy function ``f(x)`` over the range ``-2 < x < 2`` with :class:`skopt`:: - import numpy as np - from skopt import gp_minimize - - def f(x): - return (np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) * - np.random.randn() * 0.1) - - res = gp_minimize(f, [(-2.0, 2.0)]) + >>> import numpy as np + >>> from skopt import gp_minimize + >>> np.random.seed(123) + >>> def f(x): + ... return (np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) * + ... np.random.randn() * 0.1) + >>> + >>> res = gp_minimize(f, [(-2.0, 2.0)], n_calls=20) + >>> print("x*=%.2f f(x*)=%.2f" % (res.x[0], res.fun)) + x*=0.85 f(x*)=-0.06 For more control over the optimization loop you can use the :class:`skopt.Optimizer` class:: - from skopt import Optimizer - - opt = Optimizer([(-2.0, 2.0)]) - - for i in range(20): - suggested = opt.ask() - y = f(suggested) - opt.tell(suggested, y) - print('iteration:', i, suggested, y) + >>> from skopt import Optimizer + >>> opt = Optimizer([(-2.0, 2.0)]) + >>> + >>> for i in range(20): + ... suggested = opt.ask() + ... y = f(suggested) + ... res = opt.tell(suggested, y) + >>> print("x*=%.2f f(x*)=%.2f" % (res.x[0], res.fun)) + x*=0.27 f(x*)=-0.15 For more read our :ref:`sphx_glr_auto_examples_bayesian-optimization.py` and the other `examples `_. diff --git a/doc/install.rst b/doc/install.rst index 620ad836b..2744eb7b0 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -4,7 +4,14 @@ Installation ============ -scikit-optimize supports Python 3.5 or newer. +scikit-optimize requires: + +* Python >= 3.6 +* NumPy (>= 1.13.3) +* SciPy (>= 0.19.1) +* joblib (>= 0.11) +* scikit-learn >= 0.20 +* matplotlib >= 2.0.0 The newest release can be installed via pip: diff --git a/doc/modules/acquisition.rst b/doc/modules/acquisition.rst index 3ecf2e116..5332833b8 100644 --- a/doc/modules/acquisition.rst +++ b/doc/modules/acquisition.rst @@ -4,3 +4,48 @@ Acquisition =========== +Function to minimize over the posterior distribution. + +:class:`gaussian_lcb` +--------------------- +Use the lower confidence bound to estimate the acquisition +values. + +The trade-off between exploitation and exploration is left to +be controlled by the user through the parameter ``kappa``. + +:class:`gaussian_pi` +-------------------- +Use the probability of improvement to calculate the acquisition values. + +The conditional probability `P(y=f(x) | x)` form a gaussian with a +certain mean and standard deviation approximated by the model. + +The PI condition is derived by computing ``E[u(f(x))]`` +where ``u(f(x)) = 1``, if ``f(x) < y_opt`` and ``u(f(x)) = 0``, +if ``f(x) > y_opt``. + +This means that the PI condition does not care about how "better" the +predictions are than the previous values, since it gives an equal reward +to all of them. + +Note that the value returned by this function should be maximized to +obtain the ``X`` with maximum improvement. + + +:class:`gaussian_ei` +-------------------- +Use the expected improvement to calculate the acquisition values. + +The conditional probability `P(y=f(x) | x)` form a gaussian with a certain +mean and standard deviation approximated by the model. + +The EI condition is derived by computing ``E[u(f(x))]`` +where ``u(f(x)) = 0``, if ``f(x) > y_opt`` and ``u(f(x)) = y_opt - f(x)``, +if ``f(x) < y_opt``. + +This solves one of the issues of the PI condition by giving a reward +proportional to the amount of improvement got. + +Note that the value returned by this function should be maximized to +obtain the ``X`` with maximum improvement. diff --git a/doc/modules/bayessearchcv.rst b/doc/modules/bayessearchcv.rst index 1a2b1f383..dfa402c12 100644 --- a/doc/modules/bayessearchcv.rst +++ b/doc/modules/bayessearchcv.rst @@ -1,3 +1,5 @@ +.. currentmodule:: skopt + .. _bayessearchcv: BayesSearchCV, a GridSearchCV compatible estimator @@ -5,3 +7,17 @@ BayesSearchCV, a GridSearchCV compatible estimator Use ``BayesSearchCV`` as a replacement for scikit-learn's GridSearchCV. +BayesSearchCV implements a "fit" and a "score" method. +It also implements "predict", "predict_proba", "decision_function", +"transform" and "inverse_transform" if they are implemented in the +estimator used. + +The parameters of the estimator used to apply these methods are optimized +by cross-validated search over parameter settings. + +In contrast to GridSearchCV, not all parameter values are tried out, but +rather a fixed number of parameter settings is sampled from the specified +distributions. The number of parameter settings that are tried is +given by n_iter. + +Parameters are presented as a list of :class:`skopt.space.Dimension` objects. \ No newline at end of file diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index a43b295ef..5d364e38c 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -33,6 +33,7 @@ Functions dummy_minimize dump expected_minimum + expected_minimum_random_sampling forest_minimize gbrt_minimize gp_minimize @@ -187,9 +188,14 @@ details. :template: function.rst plots.partial_dependence + plots.partial_dependence_1D + plots.partial_dependence_2D plots.plot_convergence plots.plot_evaluations + plots.plot_gaussian_process plots.plot_objective + plots.plot_objective_2D + plots.plot_histogram plots.plot_regret .. _utils_ref: @@ -211,14 +217,38 @@ details. :template: function.rst utils.cook_estimator + utils.cook_initial_point_generator utils.dimensions_aslist utils.expected_minimum + utils.expected_minimum_random_sampling utils.dump utils.load utils.point_asdict utils.point_aslist utils.use_named_args +.. _sampler_ref: + +:mod:`skopt.sampler`: Samplers +============================== + +.. automodule:: skopt.sampler + :no-members: + :no-inherited-members: + +**User guide:** See the :ref:`sampler` section for further details. + +.. currentmodule:: skopt + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + sampler.Lhs + sampler.Sobol + sampler.Halton + sampler.Hammersly + .. _space_ref: @@ -272,5 +302,7 @@ details. space.transformers.Normalize space.transformers.Pipeline space.transformers.Transformer + space.transformers.LabelEncoder + space.transformers.StringEncoder diff --git a/doc/modules/minimize_functions.rst b/doc/modules/minimize_functions.rst index 0e738a115..1b05cd551 100644 --- a/doc/modules/minimize_functions.rst +++ b/doc/modules/minimize_functions.rst @@ -8,7 +8,42 @@ These are easy to get started with. They mirror the ``scipy.optimize`` API and provide a high level interface to various pre-configured optimizers. -* :class:`dummy_minimize` -* :class:`forest_minimize` -* :class:`gbrt_minimize` -* :class:`gp_minimize` +:class:`dummy_minimize` +----------------------- +Random search by uniform sampling within the given bounds. + +:class:`forest_minimize` +------------------------ +Sequential optimisation using decision trees. + +A tree based regression model is used to model the expensive to evaluate +function `func`. The model is improved by sequentially evaluating +the expensive function at the next best point. Thereby finding the +minimum of `func` with as few evaluations as possible. + +:class:`gbrt_minimize` +---------------------- +Sequential optimization using gradient boosted trees. + +Gradient boosted regression trees are used to model the (very) +expensive to evaluate function `func`. The model is improved +by sequentially evaluating the expensive function at the next +best point. Thereby finding the minimum of `func` with as +few evaluations as possible. + +:class:`gp_minimize` +-------------------- +Bayesian optimization using Gaussian Processes. + +If every function evaluation is expensive, for instance +when the parameters are the hyperparameters of a neural network +and the function evaluation is the mean cross-validation score across +ten folds, optimizing the hyperparameters by standard optimization +routines would take for ever! + +The idea is to approximate the function using a Gaussian process. +In other words the function values are assumed to follow a multivariate +gaussian. The covariance of the function values are given by a +GP kernel between the parameters. Then a smart choice to choose the +next parameter to evaluate can be made by the acquisition function +over the Gaussian prior which is much quicker to evaluate. diff --git a/doc/modules/plots.rst b/doc/modules/plots.rst index d33c3f65f..77f8b3ee0 100644 --- a/doc/modules/plots.rst +++ b/doc/modules/plots.rst @@ -1,13 +1,39 @@ -.. currentmodule:: skopt.plots .. _plots: +============== Plotting tools ============== -Plotting functions. -* :class:`partial_dependence` -* :class:`plot_convergence` -* :class:`plot_evaluations` -* :class:`plot_objective` -* :class:`plot_regret` \ No newline at end of file +.. currentmodule:: skopt.plots + +Plotting functions can be used to visualize the optimization process. + +plot_convergence +================ +:class:`plot_convergence` plots one or several convergence traces. + +.. figure:: ../auto_examples/images/sphx_glr_hyperparameter-optimization_001.png + :target: ../auto_examples/hyperparameter-optimization.html + :align: center + +plot_evaluations +================ +:class:`plot_evaluations` visualize the order in which points where sampled. + +.. figure:: ../auto_examples/plots/images/sphx_glr_visualizing-results_002.png + :target: ../auto_examples/plots/visualizing-results.htm + :align: center + +plot_objective +============== +:class:`plot_objective` creates pairwise dependence plot of the objective function. + +.. figure:: ../auto_examples/plots/images/sphx_glr_partial-dependence-plot_001.png + :target: ../auto_examples/plots/partial-dependence-plot.html + :align: center + + +plot_regret +=========== +:class:`plot_regret` plot one or several cumulative regret traces. diff --git a/doc/modules/sampler.rst b/doc/modules/sampler.rst new file mode 100644 index 000000000..eeb151f6d --- /dev/null +++ b/doc/modules/sampler.rst @@ -0,0 +1,6 @@ +.. currentmodule:: skopt.sampler + +.. _sampler: + +Sampling methods +================ diff --git a/doc/modules/space.rst b/doc/modules/space.rst index 4525732d6..9eac0857c 100644 --- a/doc/modules/space.rst +++ b/doc/modules/space.rst @@ -2,6 +2,18 @@ .. _space: -Space define the optimization space -=================================== +Space +===== +:class:`Space` define the optimization space which contains one or multiple dimensions of the following type: +:class:`Real` +------------- +Search space dimension that can take on any real value. + +:class:`Integer` +---------------- +Search space dimension that can take on integer values. + +:class:`Categorical` +-------------------- +Search space dimension that can take on categorical values. \ No newline at end of file diff --git a/doc/modules/transformers.rst b/doc/modules/transformers.rst new file mode 100644 index 000000000..34693f287 --- /dev/null +++ b/doc/modules/transformers.rst @@ -0,0 +1,7 @@ +.. currentmodule:: skopt.space.transformers + +.. _transformers: + +Transformers +============ + diff --git a/doc/modules/utils.rst b/doc/modules/utils.rst index a7e7fa0dd..62134ca88 100644 --- a/doc/modules/utils.rst +++ b/doc/modules/utils.rst @@ -7,3 +7,31 @@ Utility functions This is a list of public utility functions. Other functions in this module are meant for internal use. +:func:`use_named_args` +---------------------- +This utility function allows it to use objective functions with named arguments:: + + >>> # Define the search-space dimensions. They must all have names! + >>> from skopt.space import Real + >>> from skopt.utils import use_named_args + >>> dim1 = Real(name='foo', low=0.0, high=1.0) + >>> dim2 = Real(name='bar', low=0.0, high=1.0) + >>> dim3 = Real(name='baz', low=0.0, high=1.0) + >>> + >>> # Gather the search-space dimensions in a list. + >>> dimensions = [dim1, dim2, dim3] + >>> + >>> # Define the objective function with named arguments + >>> # and use this function-decorator to specify the + >>> # search-space dimensions. + >>> @use_named_args(dimensions=dimensions) + ... def my_objective_function(foo, bar, baz): + ... return foo ** 2 + bar ** 4 + baz ** 8 + +:func:`dump` +------------ +Store an skopt optimization result into a file. + +:func:`load` +------------ +Reconstruct a skopt optimization result from a file persisted with :func:`dump`. diff --git a/doc/preface.rst b/doc/preface.rst new file mode 100644 index 000000000..c80f619cd --- /dev/null +++ b/doc/preface.rst @@ -0,0 +1,25 @@ +.. This helps define the TOC ordering for "about us" sections. Particularly + useful for PDF output as this section is not linked from elsewhere. + +.. Places global toc into the sidebar + +:globalsidebartoc: True + +.. _preface_menu: + +.. include:: includes/big_toc_css.rst +.. include:: tune_toc.rst + +========================== +Welcome to scikit-optimize +========================== + +| + +.. toctree:: + :maxdepth: 2 + + install + whats_new + +| \ No newline at end of file diff --git a/doc/requirements.txt b/doc/requirements.txt index 1ab1b285d..4713469ee 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -4,5 +4,5 @@ matplotlib pandas ipywidgets sphinx -sphinx-gallery +sphinx-gallery>=0.6 numpydoc diff --git a/doc/templates/index.html b/doc/templates/index.html index 8244df212..7aff01909 100644 --- a/doc/templates/index.html +++ b/doc/templates/index.html @@ -8,7 +8,7 @@

scikit-optimize

Sequential model-based optimization in Python

Getting Started - What's New in {{ release }} + What's New in {{ release }} GitHub
@@ -35,6 +35,7 @@

Sequential model-bas

@@ -55,11 +56,11 @@

Sequential model-bas
-

Visualizing

+

Visualizing

Visualizing optimization results

@@ -106,10 +107,13 @@

Sequential model-bas

News

  • On-going development: - What's new (Changelog) + What's new (Changelog)
  • -
  • Feb 2020. scikit-optimize 0.7.1 (Changelog). -
  • Jan 2020. scikit-optimize 0.7 (Changelog). +
  • Sep 2020. scikit-optimize 0.8.1 (Changelog). +
  • Sep 2020. scikit-optimize 0.8 (Changelog). +
  • Feb 2020. scikit-optimize 0.7.2 (Changelog). +
  • Feb 2020. scikit-optimize 0.7.1 (Changelog). +
  • Jan 2020. scikit-optimize 0.7 (Changelog).
  • April 2018. scikit-optimize 0.6 (Changelog).
  • Mar 2018. scikit-optimize 0.5 (Changelog).
  • Aug 2017. scikit-optimize 0.4 (Changelog). diff --git a/doc/themes/scikit-learn-modern/javascript.html b/doc/themes/scikit-learn-modern/javascript.html index f62c657d1..4d9685653 100644 --- a/doc/themes/scikit-learn-modern/javascript.html +++ b/doc/themes/scikit-learn-modern/javascript.html @@ -10,7 +10,7 @@