diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py index 35ed4c515dd81..bb73b3ad3e22f 100644 --- a/maint_tools/test_docstrings.py +++ b/maint_tools/test_docstrings.py @@ -23,7 +23,6 @@ "PassiveAggressiveClassifier", "PassiveAggressiveRegressor", "QuadraticDiscriminantAnalysis", - "SelfTrainingClassifier", "SparseRandomProjection", "SpectralBiclustering", "SpectralCoclustering", diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py index ad627c6f98574..71d2a7b32236b 100644 --- a/sklearn/semi_supervised/_self_training.py +++ b/sklearn/semi_supervised/_self_training.py @@ -37,30 +37,30 @@ class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator): Parameters ---------- base_estimator : estimator object - An estimator object implementing ``fit`` and ``predict_proba``. - Invoking the ``fit`` method will fit a clone of the passed estimator, - which will be stored in the ``base_estimator_`` attribute. + An estimator object implementing `fit` and `predict_proba`. + Invoking the `fit` method will fit a clone of the passed estimator, + which will be stored in the `base_estimator_` attribute. threshold : float, default=0.75 The decision threshold for use with `criterion='threshold'`. - Should be in [0, 1). When using the 'threshold' criterion, a + Should be in [0, 1). When using the `'threshold'` criterion, a :ref:`well calibrated classifier ` should be used. criterion : {'threshold', 'k_best'}, default='threshold' The selection criterion used to select which labels to add to the - training set. If 'threshold', pseudo-labels with prediction - probabilities above `threshold` are added to the dataset. If 'k_best', + training set. If `'threshold'`, pseudo-labels with prediction + probabilities above `threshold` are added to the dataset. If `'k_best'`, the `k_best` pseudo-labels with highest prediction probabilities are added to the dataset. When using the 'threshold' criterion, a :ref:`well calibrated classifier ` should be used. k_best : int, default=10 The amount of samples to add in each iteration. Only used when - `criterion` is k_best'. + `criterion='k_best'`. max_iter : int or None, default=10 Maximum number of iterations allowed. Should be greater than or equal - to 0. If it is ``None``, the classifier will continue to predict labels + to 0. If it is `None`, the classifier will continue to predict labels until no new pseudo-labels are added, or all unlabeled samples have been labeled. @@ -74,7 +74,7 @@ class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator): classes_ : ndarray or list of ndarray of shape (n_classes,) Class labels for each output. (Taken from the trained - ``base_estimator_``). + `base_estimator_`). transduction_ : ndarray of shape (n_samples,) The labels used for the final fit of the classifier, including @@ -104,11 +104,24 @@ class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator): termination_condition_ : {'max_iter', 'no_change', 'all_labeled'} The reason that fitting was stopped. - - 'max_iter': `n_iter_` reached `max_iter`. - - 'no_change': no new labels were predicted. - - 'all_labeled': all unlabeled samples were labeled before `max_iter` + - `'max_iter'`: `n_iter_` reached `max_iter`. + - `'no_change'`: no new labels were predicted. + - `'all_labeled'`: all unlabeled samples were labeled before `max_iter` was reached. + See Also + -------- + LabelPropagation : Label propagation classifier. + LabelSpreading : Label spreading model for semi-supervised learning. + + References + ---------- + David Yarowsky. 1995. Unsupervised word sense disambiguation rivaling + supervised methods. In Proceedings of the 33rd annual meeting on + Association for Computational Linguistics (ACL '95). Association for + Computational Linguistics, Stroudsburg, PA, USA, 189-196. DOI: + https://doi.org/10.3115/981658.981684 + Examples -------- >>> import numpy as np @@ -123,14 +136,6 @@ class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator): >>> self_training_model = SelfTrainingClassifier(svc) >>> self_training_model.fit(iris.data, iris.target) SelfTrainingClassifier(...) - - References - ---------- - David Yarowsky. 1995. Unsupervised word sense disambiguation rivaling - supervised methods. In Proceedings of the 33rd annual meeting on - Association for Computational Linguistics (ACL '95). Association for - Computational Linguistics, Stroudsburg, PA, USA, 189-196. DOI: - https://doi.org/10.3115/981658.981684 """ _estimator_type = "classifier" @@ -153,7 +158,7 @@ def __init__( def fit(self, X, y): """ - Fits this ``SelfTrainingClassifier`` to a dataset. + Fit self-training classifier using `X`, `y` as training data. Parameters ---------- @@ -167,7 +172,7 @@ def fit(self, X, y): Returns ------- self : object - Returns an instance of self. + Fitted estimator. """ # we need row slicing support for sparce matrices, but costly finiteness check # can be delegated to the base estimator. @@ -281,7 +286,7 @@ def fit(self, X, y): @if_delegate_has_method(delegate="base_estimator") def predict(self, X): - """Predict the classes of X. + """Predict the classes of `X`. Parameters ---------- @@ -326,7 +331,7 @@ def predict_proba(self, X): @if_delegate_has_method(delegate="base_estimator") def decision_function(self, X): - """Calls decision function of the `base_estimator`. + """Call decision function of the `base_estimator`. Parameters ---------- @@ -372,7 +377,7 @@ def predict_log_proba(self, X): @if_delegate_has_method(delegate="base_estimator") def score(self, X, y): - """Calls score on the `base_estimator`. + """Call score on the `base_estimator`. Parameters ----------