From a027442e624b93b68337ed4b7e9ec644326f5123 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 23 Feb 2017 20:30:21 +1100 Subject: [PATCH 1/4] DOC describe scikit-learn-contrib in related projects --- doc/related_projects.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/related_projects.rst b/doc/related_projects.rst index 29e0a3337e4ba..e087ee99c0b06 100644 --- a/doc/related_projects.rst +++ b/doc/related_projects.rst @@ -4,6 +4,13 @@ Related Projects ===================================== +Projects implementing the scikit-learn estimator API are encouraged to use +the `scikit-learn-contrib template `_ +which encourages best practices for testing and documenting estimators. +The `scikit-learn-contrib GitHub organisation `_ +also accepts high-quality contributions of repositories conforming to this +template. + Below is a list of sister-projects, extensions and domain specific packages. Interoperability and framework enhancements From 3f39fc358ac1d0e26962e533a1a811c8f66e29f2 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 23 Feb 2017 20:47:19 +1100 Subject: [PATCH 2/4] DOC More on project template in contributor docs --- doc/developers/contributing.rst | 108 +++++++++++++++++++------------- 1 file changed, 65 insertions(+), 43 deletions(-) diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst index 8809023d54184..c5a5edbd81c05 100644 --- a/doc/developers/contributing.rst +++ b/doc/developers/contributing.rst @@ -957,49 +957,71 @@ adheres to the scikit-learn interface and standards by running >>> check_estimator(LinearSVC) # passes The main motivation to make a class compatible to the scikit-learn estimator -interface might be that you want to use it together with model assessment and -selection tools such as :class:`model_selection.GridSearchCV`. - -For this to work, you need to implement the following interface. -If a dependency on scikit-learn is okay for your code, -you can prevent a lot of boilerplate code -by deriving a class from ``BaseEstimator`` -and optionally the mixin classes in ``sklearn.base``. -E.g., below is a custom classifier. For more information on this example, see -`scikit-learn-contrib `_:: - - >>> import numpy as np - >>> from sklearn.base import BaseEstimator, ClassifierMixin - >>> from sklearn.utils.validation import check_X_y, check_array, check_is_fitted - >>> from sklearn.utils.multiclass import unique_labels - >>> from sklearn.metrics import euclidean_distances - >>> class TemplateClassifier(BaseEstimator, ClassifierMixin): - ... - ... def __init__(self, demo_param='demo'): - ... self.demo_param = demo_param - ... - ... def fit(self, X, y): - ... - ... # Check that X and y have correct shape - ... X, y = check_X_y(X, y) - ... # Store the classes seen during fit - ... self.classes_ = unique_labels(y) - ... - ... self.X_ = X - ... self.y_ = y - ... # Return the classifier - ... return self - ... - ... def predict(self, X): - ... - ... # Check is fit had been called - ... check_is_fitted(self, ['X_', 'y_']) - ... - ... # Input validation - ... X = check_array(X) - ... - ... closest = np.argmin(euclidean_distances(X, self.X_), axis=1) - ... return self.y_[closest] +interface might be that you want to use it together with model evaluation and +selection tools such as :class:`model_selection.GridSearchCV` and +:class:`pipeline.Pipeline`. + +For this to work, you need to implement the interface described below. + +.. topic:: Project template: + + We provide a `project template `_ + which helps in the creation of Python packages containing scikit-learn compatible estimators. + It provides: + + * an initial git repository with Python package directory structure + * a template of a scikit-learn estimator + * an initial test suite including use of ``check_estimator`` + * directory structures and scripts to compile documentation and example + galleries + * scripts to manage continuous integration (testing on Linux and Windows) + * instructions from getting started to publishing on `PyPi `_ + +.. topic:: ``BaseEstimator`` and mixins: + + We tend to use use "duck typing", so building an estimator which follows + the API suffices for compatibility, without needing to inherit from or + even import any scikit-learn classes. + + However, if a dependency on scikit-learn is acceptable in your code, + you can prevent a lot of boilerplate code + by deriving a class from ``BaseEstimator`` + and optionally the mixin classes in ``sklearn.base``. + E.g., below is a custom classifier. + `scikit-learn-contrib `_:: + + >>> import numpy as np + >>> from sklearn.base import BaseEstimator, ClassifierMixin + >>> from sklearn.utils.validation import check_X_y, check_array, check_is_fitted + >>> from sklearn.utils.multiclass import unique_labels + >>> from sklearn.metrics import euclidean_distances + >>> class TemplateClassifier(BaseEstimator, ClassifierMixin): + ... + ... def __init__(self, demo_param='demo'): + ... self.demo_param = demo_param + ... + ... def fit(self, X, y): + ... + ... # Check that X and y have correct shape + ... X, y = check_X_y(X, y) + ... # Store the classes seen during fit + ... self.classes_ = unique_labels(y) + ... + ... self.X_ = X + ... self.y_ = y + ... # Return the classifier + ... return self + ... + ... def predict(self, X): + ... + ... # Check is fit had been called + ... check_is_fitted(self, ['X_', 'y_']) + ... + ... # Input validation + ... X = check_array(X) + ... + ... closest = np.argmin(euclidean_distances(X, self.X_), axis=1) + ... return self.y_[closest] get_params and set_params From e99bb344a27be4472f10a5fad5c7d97217082fb2 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 23 Feb 2017 21:08:00 +1100 Subject: [PATCH 3/4] Word choice --- doc/related_projects.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/related_projects.rst b/doc/related_projects.rst index e087ee99c0b06..846bc470dfdb3 100644 --- a/doc/related_projects.rst +++ b/doc/related_projects.rst @@ -6,7 +6,7 @@ Related Projects Projects implementing the scikit-learn estimator API are encouraged to use the `scikit-learn-contrib template `_ -which encourages best practices for testing and documenting estimators. +which facilitates best practices for testing and documenting estimators. The `scikit-learn-contrib GitHub organisation `_ also accepts high-quality contributions of repositories conforming to this template. From 797f3fe0bd47ea555caa53aaad8de222453394d2 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Fri, 24 Feb 2017 12:58:49 +1100 Subject: [PATCH 4/4] DOC more structural clarity --- doc/developers/contributing.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst index c5a5edbd81c05..d83650345b422 100644 --- a/doc/developers/contributing.rst +++ b/doc/developers/contributing.rst @@ -961,7 +961,8 @@ interface might be that you want to use it together with model evaluation and selection tools such as :class:`model_selection.GridSearchCV` and :class:`pipeline.Pipeline`. -For this to work, you need to implement the interface described below. +Before detailing the required interface below, we describe two ways to achieve +the correct interface more easily. .. topic:: Project template: @@ -987,8 +988,9 @@ For this to work, you need to implement the interface described below. you can prevent a lot of boilerplate code by deriving a class from ``BaseEstimator`` and optionally the mixin classes in ``sklearn.base``. - E.g., below is a custom classifier. - `scikit-learn-contrib `_:: + For example, below is a custom classifier, with more examples included + in the scikit-learn-contrib + `project template `_. >>> import numpy as np >>> from sklearn.base import BaseEstimator, ClassifierMixin