10000 DOC Improve neighbors documentation (#16923) · viclafargue/scikit-learn@246fd19 · GitHub
[go: up one dir, main page]

Skip to content

Commit 246fd19

Browse files
haochunchangviclafargue
authored andcommitted
DOC Improve neighbors documentation (scikit-learn#16923)
1 parent 1957e63 commit 246fd19

File tree

3 files changed

+56
-13
lines changed

3 files changed

+56
-13
lines changed

sklearn/neighbors/_binary_tree.pxi

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,9 +239,10 @@ cdef NodeData_t[::1] get_memview_NodeData_1D(
239239
# Define doc strings, substituting the appropriate class name using
240240
# the DOC_DICT variable defined in the pyx files.
241241
CLASS_DOC = \
242-
"""{BinaryTree} for fast generalized N-point problems
242+
"""
243+
{BinaryTree}(X, leaf_size=40, metric='minkowski', **kwargs)
243244
244-
{BinaryTree}(X, leaf_size=40, metric='minkowski', \\**kwargs)
245+
{BinaryTree} for fast generalized N-point problems
245246
246247
Parameters
247248
----------
@@ -1159,15 +1160,50 @@ cdef class BinaryTree:
11591160
self._update_memviews()
11601161

11611162
def get_tree_stats(self):
1163+
"""
1164+
get_tree_stats(self)
1165+
1166+
Get tree status.
1167+
1168+
Returns
1169+
-------
1170+
tree_stats: tuple of int
1171+
(number of trims, number of leaves, number of splits)
1172+
"""
11621173
return (self.n_trims, self.n_leaves, self.n_splits)
11631174

11641175
def reset_n_calls(self):
1176+
"""
1177+
reset_n_calls(self)
1178+
1179+
Reset number of calls to 0.
1180+
"""
11651181
self.n_calls = 0
11661182

11671183
def get_n_calls(self):
1184+
"""
1185+
get_n_calls(self)
1186+
1187+
Get number of calls.
1188+
1189+
Returns
1190+
-------
1191+
n_calls: int
1192+
number of distance computation calls
1193+
"""
11681194
return self.n_calls
11691195

11701196
def get_arrays(self):
1197+
"""
1198+
get_arrays(self)
1199+
1200+
Get data and node arrays.
1201+
1202+
Returns
1203+
-------
1204+
arrays: tuple of array
1205+
Arrays for storing tree data, index, node data and node bounds.
1206+
"""
11711207
return (self.data_arr, self.idx_array_arr,
11721208
self.node_data_arr, self.node_bounds_arr)
11731209

@@ -1362,7 +1398,8 @@ cdef class BinaryTree:
13621398
def query_radius(self, X, r, int return_distance=False,
13631399
int count_only=False, int sort_results=False):
13641400
"""
1365-
query_radius(self, X, r, count_only = False):
1401+
query_radius(X, r, return_distance=False,
1402+
count_only=False, sort_results=False)
13661403
13671404
query the tree for neighbors within a radius r
13681405
@@ -1694,7 +1731,10 @@ cdef class BinaryTree:
16941731
return np.exp(log_density_arr)
16951732

16961733
def two_point_correlation(self, X, r, dualtree=False):
1697-
"""Compute the two-point correlation function
1734+
"""
1735+
two_point_correlation(X, r, dualtree=False)
1736+
1737+
Compute the two-point correlation function
16981738
16991739
Parameters
17001740
----------

sklearn/neighbors/_dist_metrics.pyx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,10 @@ cdef class DistanceMetric:
110110
This class provides a uniform interface to fast distance metric
111111
functions. The various metrics can be accessed via the :meth:`get_metric`
112112
class method and the metric string identifier (see below).
113-
For example, to use the Euclidean distance:
114113
114+
Examples
115+
--------
116+
>>> from sklearn.neighbors import DistanceMetric
115117
>>> dist = DistanceMetric.get_metric('euclidean')
116118
>>> X = [[0, 1, 2],
117119
[3, 4, 5]]

sklearn/neighbors/_lof.py

Lines changed: 9 additions & 8 deletions
< 6D40 table aria-label="Diff for: sklearn/neighbors/_lof.py" class="tab-size width-full DiffLines-module__tableLayoutFixed--ZmaVx" data-diff-anchor="diff-4f91df0fe0863d0fb7bd4744a87ff4e2e8dd75599da0eebdbaaf4eb02872d6cd" data-tab-size="8" data-paste-markdown-skip="true" role="grid" style="--line-number-cell-width:44px;--line-number-cell-width-unified:88px">Original file line numberDiff line numberDiff line change
@@ -176,8 +176,9 @@ def __init__(self, n_neighbors=20, algorithm='auto', leaf_size=30,
176176

177177
@property
178178
def fit_predict(self):
179-
""""Fits the model to the training set X and returns the labels.
179+
"""Fits the model to the training set X and returns the labels.
180180
181+
**Only available for novelty detection (when novelty is set to True).**
181182
Label is 1 for an inlier and -1 for an outlier according to the LOF
182183
score and the contamination parameter.
183184
@@ -207,7 +208,7 @@ def fit_predict(self):
207208
return self._fit_predict
208209

209210
def _fit_predict(self, X, y=None):
210-
""""Fits the model to the training set X and returns the labels.
211+
"""Fits the model to the training set X and returns the labels.
211212
212213
Label is 1 for an 9E88 inlier and -1 for an outlier according to the LOF
213214
score and the contamination parameter.
@@ -286,9 +287,9 @@ def fit(self, X, y=None):
286287
def predict(self):
287288
"""Predict the labels (1 inlier, -1 outlier) of X according to LOF.
288289
290+
**Only available for novelty detection (when novelty is set to True).**
289291
This method allows to generalize prediction to *new observations* (not
290-
in the training set). Only available for novelty detection (when
291-
novelty is set to True).
292+
in the training set).
292293
293294
Parameters
294295
----------
@@ -345,8 +346,8 @@ def decision_function(self):
345346
346347
Bigger is better, i.e. large values correspond to inliers.
347348
349+
**Only available for novelty detection (when novelty is set to True).**
348350
The shift offset allows a zero threshold for being an outlier.
349-
Only available for novelty detection (when novelty is set to True).
350351
The argument X is supposed to contain *new data*: if X contains a
351352
point from training, it considers the later in its own neighborhood.
352353
Also, the samples in X are not considered in the neighborhood of any
@@ -381,8 +382,8 @@ def _decision_function(self, X):
381382
382383
Bigger is better, i.e. large values correspond to inliers.
383384
385+
**Only available for novelty detection (when novelty is set to True).**
384386
The shift offset allows a zero threshold for being an outlier.
385-
Only available for novelty detection (when novelty is set to True).
386387
The argument X is supposed to contain *new data*: if X contains a
387388
point from training, it considers the later in its own neighborhood.
388389
Also, the samples in X are not considered in the neighborhood of any
@@ -411,7 +412,7 @@ def score_samples(self):
411412
It is the opposite as bigger is better, i.e. large values correspond
412413
to inliers.
413414
414-
Only available for novelty detection (when novelty is set to True).
415+
**Only available for novelty detection (when novelty is set to True).**
415416
The argument X is supposed to contain *new data*: if X contains a
416417
point from training, it considers the later in its own neighborhood.
417418
Also, the samples in X are not considered in the neighborhood of any
@@ -447,7 +448,7 @@ def _score_samples(self, X):
447448
It is the opposite as bigger is better, i.e. large values correspond
448449
to inliers.
449450
450-
Only available for novelty detection (when novelty is set to True).
451+
**Only available for novelty detection (when novelty is set to True).**
451452
The argument X is supposed to contain *new data*: if X contains a
452453
point from training, it considers the later in its own neighborhood.
453454
Also, the samples in X are not considered in the neighborhood of any

0 commit comments

Comments
 (0)
0