From c09572e2989016360415b078a7e49a4f37f98c9c Mon Sep 17 00:00:00 2001 From: Pinky-Chaudhary Date: Tue, 28 Sep 2021 12:44:30 +0530 Subject: [PATCH 1/5] fixed pytest errorts for PLSCanonical --- maint_tools/test_docstrings.py | 1 - sklearn/cross_decomposition/_pls.py | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py index 4bd301d795937..6a9c6c83cfd23 100644 --- a/maint_tools/test_docstrings.py +++ b/maint_tools/test_docstrings.py @@ -23,7 +23,6 @@ "MultiTaskLassoCV", "OrthogonalMatchingPursuit", "OrthogonalMatchingPursuitCV", - "PLSCanonical", "PLSRegression", "PLSSVD", "PassiveAggressiveClassifier", diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py index e36b79648d6c7..66dc4e74ab4a0 100644 --- a/sklearn/cross_decomposition/_pls.py +++ b/sklearn/cross_decomposition/_pls.py @@ -680,7 +680,7 @@ class PLSCanonical(_PLS): will compute the whole SVD. max_iter : int, default=500 - the maximum number of iterations of the power method when + The maximum number of iterations of the power method when `algorithm='nipals'`. Ignored otherwise. tol : float, default=1e-06 @@ -748,6 +748,11 @@ class PLSCanonical(_PLS): .. versionadded:: 1.0 + See Also + -------- + CCA : Canonical Correlation Analysis. + PLSSVD : Partial Least Square SVD. + Examples -------- >>> from sklearn.cross_decomposition import PLSCanonical @@ -757,11 +762,6 @@ class PLSCanonical(_PLS): >>> plsca.fit(X, Y) PLSCanonical() >>> X_c, Y_c = plsca.transform(X, Y) - - See Also - -------- - CCA - PLSSVD """ # This implementation provides the same results that the "plspm" package From c751ce7ea1278c011fd229492878def4b5d43b0d Mon Sep 17 00:00:00 2001 From: Pinky-Chaudhary Date: Tue, 28 Sep 2021 17:03:01 +0530 Subject: [PATCH 2/5] Fix docs for LocalOutlierFactor --- sklearn/neighbors/_lof.py | 46 +++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/sklearn/neighbors/_lof.py b/sklearn/neighbors/_lof.py index 2c7435f1bdd10..a5541f57e66df 100644 --- a/sklearn/neighbors/_lof.py +++ b/sklearn/neighbors/_lof.py @@ -17,18 +17,18 @@ class LocalOutlierFactor(KNeighborsMixin, OutlierMixin, NeighborsBase): - """Unsupervised Outlier Detection using Local Outlier Factor (LOF) - - The anomaly score of each sample is called Local Outlier Factor. - It measures the local deviation of density of a given sample with - respect to its neighbors. - It is local in that the anomaly score depends on how isolated the object - is with respect to the surrounding neighborhood. - More precisely, locality is given by k-nearest neighbors, whose distance - is used to estimate the local density. - By comparing the local density of a sample to the local densities of - its neighbors, one can identify samples that have a substantially lower - density than their neighbors. These are considered outliers. + """Unsupervised Outlier Detection using the Local Outlier Factor (LOF). + + The anomaly score of each sample is called the Local Outlier Factor. + It measures the local deviation of the density of a given sample with respect + to its neighbors. + It is local in that the anomaly score depends on how isolated the object + is with respect to the surrounding neighborhood. + More precisely, locality is given by k-nearest neighbors, whose distance + is used to estimate the local density. + By comparing the local density of a sample to the local densities of its + neighbors, one can identify samples that have a substantially lower density + than their neighbors. These are considered outliers. .. versionadded:: 0.19 @@ -52,13 +52,13 @@ class LocalOutlierFactor(KNeighborsMixin, OutlierMixin, NeighborsBase): this parameter, using brute force. leaf_size : int, default=30 - Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can + Leaf is size passed to :class:`BallTree` or :class:`KDTree`. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem. metric : str or callable, default='minkowski' - metric used for the distance computation. Any metric from scikit-learn + The metric is used for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is "precomputed", X is assumed to be a distance matrix and @@ -84,7 +84,7 @@ class LocalOutlierFactor(KNeighborsMixin, OutlierMixin, NeighborsBase): See the documentation for scipy.spatial.distance for details on these metrics: - https://docs.scipy.org/doc/scipy/reference/spatial.distance.html + https://docs.scipy.org/doc/scipy/reference/spatial.distance.html. p : int, default=2 Parameter for the Minkowski metric from @@ -170,6 +170,15 @@ class LocalOutlierFactor(KNeighborsMixin, OutlierMixin, NeighborsBase): n_samples_fit_ : int It is the number of samples in the fitted data. + See also + ---------- + sklearn.svm.OneClassSVM: Unsupervised Outlier Detection. + + References + ---------- + .. [1] Breunig, M. M., Kriegel, H. P., Ng, R. T., & Sander, J. (2000, May). + LOF: identifying density-based local outliers. In ACM sigmod record. + Examples -------- >>> import numpy as np @@ -180,11 +189,6 @@ class LocalOutlierFactor(KNeighborsMixin, OutlierMixin, NeighborsBase): array([ 1, 1, -1, 1]) >>> clf.negative_outlier_factor_ array([ -0.9821..., -1.0370..., -73.3697..., -0.9821...]) - - References - ---------- - .. [1] Breunig, M. M., Kriegel, H. P., Ng, R. T., & Sander, J. (2000, May). - LOF: identifying density-based local outliers. In ACM sigmod record. """ def __init__( @@ -223,7 +227,7 @@ def _check_novelty_fit_predict(self): @available_if(_check_novelty_fit_predict) def fit_predict(self, X, y=None): - """Fits the model to the training set X and returns the labels. + """Fit the model to the training set X and return the labels. **Not available for novelty detection (when novelty is set to True).** Label is 1 for an inlier and -1 for an outlier according to the LOF From 3e215753845f7876ceb3050265f21ef6c8679f06 Mon Sep 17 00:00:00 2001 From: Pinky-Chaudhary Date: Tue, 28 Sep 2021 17:04:28 +0530 Subject: [PATCH 3/5] Remove LocalOutlierFactor from DOCSTRING_IGNORE_LIST --- maint_tools/test_docstrings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py index 5a0c5b4150bb4..20bfffd28c122 100644 --- a/maint_tools/test_docstrings.py +++ b/maint_tools/test_docstrings.py @@ -12,7 +12,6 @@ "KNNImputer", "LabelPropagation", "LabelSpreading", - "LocalOutlierFactor", "LocallyLinearEmbedding", "MiniBatchKMeans", "MultiLabelBinarizer", From 990fd903367bf5d23ebef069b0e89b6ca37c322e Mon Sep 17 00:00:00 2001 From: Pinky Date: Wed, 29 Sep 2021 18:42:54 +0530 Subject: [PATCH 4/5] Update sklearn/neighbors/_lof.py Co-authored-by: Guillaume Lemaitre --- sklearn/neighbors/_lof.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/neighbors/_lof.py b/sklearn/neighbors/_lof.py index a5541f57e66df..6a699afb4f8cf 100644 --- a/sklearn/neighbors/_lof.py +++ b/sklearn/neighbors/_lof.py @@ -172,7 +172,8 @@ class LocalOutlierFactor(KNeighborsMixin, OutlierMixin, NeighborsBase): See also ---------- - sklearn.svm.OneClassSVM: Unsupervised Outlier Detection. + sklearn.svm.OneClassSVM: Unsupervised Outlier Detection using + Support Vector Machine. References ---------- From 88823230c0acf2ecec43d9b4a47dfc7629ba3775 Mon Sep 17 00:00:00 2001 From: Pinky Date: Wed, 29 Sep 2021 18:45:19 +0530 Subject: [PATCH 5/5] Update recommended changes --- sklearn/neighbors/_lof.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sklearn/neighbors/_lof.py b/sklearn/neighbors/_lof.py index 6a699afb4f8cf..26bdb872f55e2 100644 --- a/sklearn/neighbors/_lof.py +++ b/sklearn/neighbors/_lof.py @@ -19,16 +19,16 @@ class LocalOutlierFactor(KNeighborsMixin, OutlierMixin, NeighborsBase): """Unsupervised Outlier Detection using the Local Outlier Factor (LOF). - The anomaly score of each sample is called the Local Outlier Factor. - It measures the local deviation of the density of a given sample with respect - to its neighbors. - It is local in that the anomaly score depends on how isolated the object - is with respect to the surrounding neighborhood. - More precisely, locality is given by k-nearest neighbors, whose distance - is used to estimate the local density. - By comparing the local density of a sample to the local densities of its - neighbors, one can identify samples that have a substantially lower density - than their neighbors. These are considered outliers. + The anomaly score of each sample is called the Local Outlier Factor. + It measures the local deviation of the density of a given sample with respect + to its neighbors. + It is local in that the anomaly score depends on how isolated the object + is with respect to the surrounding neighborhood. + More precisely, locality is given by k-nearest neighbors, whose distance + is used to estimate the local density. + By comparing the local density of a sample to the local densities of its + neighbors, one can identify samples that have a substantially lower density + than their neighbors. These are considered outliers. .. versionadded:: 0.19