8000 DOC Fix documentation of default values in cluster module (#17455) · jayzed82/scikit-learn@7e0bebf · GitHub
[go: up one dir, main page]

Skip to content

Commit 7e0bebf

Browse files
pgithubsGitGreg123
authored andcommitted
DOC Fix documentation of default values in cluster module (scikit-learn#17455)
* updated _affinity_propagation.py docstrings in line with scikit-learn#15761 * updated cluster module docstrings in line with scikit-learn#15761 * updated cluster module docstrings post review scikit-learn#15761 * updated cluster module docstrings post review scikit-learn#15761 Co-authored-by: Greg Knowles <gitgreg123@users.noreply.github.com>
1 parent 3a28ee9 commit 7e0bebf

File tree

9 files changed

+158
-157
lines changed

9 files changed

+158
-157
lines changed

sklearn/cluster/_affinity_propagation.py

Lines changed: 19 additions & 19 deletions
8000
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ def affinity_propagation(S, *, preference=None, convergence_iter=15,
4141
Parameters
4242
----------
4343
44-
S : array-like, shape (n_samples, n_samples)
44+
S : array-like of shape (n_samples, n_samples)
4545
Matrix of similarities between points
4646
47-
preference : array-like, shape (n_samples,) or float, optional
47+
preference : array-like of shape (n_samples,) or float, default=None
4848
Preferences for each point - points with larger values of
4949
preferences are more likely to be chosen as exemplars. The number of
5050
exemplars, i.e. of clusters, is influenced by the input preferences
@@ -53,27 +53,27 @@ def affinity_propagation(S, *, preference=None, convergence_iter=15,
5353
number of clusters). For a smaller amount of clusters, this can be set
5454
to the minimum value of the similarities.
5555
56-
convergence_iter : int, optional, default: 15
56+
convergence_iter : int, default=15
5757
Number of iterations with no change in the number
5858
of estimated clusters that stops the convergence.
5959
60-
max_iter : int, optional, default: 200
60+
max_iter : int, default=200
6161
Maximum number of iterations
6262
63-
damping : float, optional, default: 0.5
63+
damping : float, default=0.5
6464
Damping factor between 0.5 and 1.
6565
66-
copy : boolean, optional, default: True
66+
copy : bool, default=True
6767
If copy is False, the affinity matrix is modified inplace by the
6868
algorithm, for memory efficiency
6969
70-
verbose : boolean, optional, default: False
70+
verbose : bool, default=False
7171
The verbosity level
7272
73-
return_n_iter : bool, default False
73+
return_n_iter : bool, default=False
7474
Whether or not to return the number of iterations.
7575
76-
random_state : int or np.random.RandomStateInstance, default: 0
76+
random_state : int or RandomState instance, default=0
7777
Pseudo-random number generator to control the starting state.
7878
Use an int for reproducible results across function calls.
7979
See the :term:`Glossary <random_state>`.
@@ -84,10 +84,10 @@ def affinity_propagation(S, *, preference=None, convergence_iter=15,
8484
Returns
8585
-------
8686
87-
cluster_centers_indices : array, shape (n_clusters,)
87+
cluster_centers_indices : ndarray of shape (n_clusters,)
8888
index of clusters centers
8989
90-
labels : array, shape (n_samples,)
90+
labels : ndarray of shape (n_samples,)
9191
cluster labels for each point
9292
9393
n_iter : int
@@ -292,7 +292,7 @@ class AffinityPropagation(ClusterMixin, BaseEstimator):
292292
verbose : bool, default=False
293293
Whether to be verbose.
294294
295-
random_state : int or np.random.RandomStateInstance, default: 0
295+
random_state : int or RandomState instance, default=0
296296
Pseudo-random number generator to control the starting state.
297297
Use an int for reproducible results across function calls.
298298
See the :term:`Glossary <random_state>`.
@@ -382,8 +382,8 @@ def fit(self, X, y=None):
382382
383383
Parameters
384384
----------
385-
X : array-like or sparse matrix, shape (n_samples, n_features), or \
386-
array-like, shape (n_samples, n_samples)
385+
X : {array-like, sparse matrix} of shape (n_samples, n_features), or \
386+
array-like of shape (n_samples, n_samples)
387387
Training instances to cluster, or similarities / affinities between
388388
instances if ``affinity='precomputed'``. If a sparse feature matrix
389389
is provided, it will be converted into a sparse ``csr_matrix``.
@@ -428,13 +428,13 @@ def predict(self, X):
428428
429429
Parameters
430430
----------
431-
X : array-like or sparse matrix, shape (n_samples, n_features)
431+
X : {array-like, sparse matrix} of shape (n_samples, n_features)
432432
New data to predict. If a sparse matrix is provided, it will be
433433
converted into a sparse ``csr_matrix``.
434434
435435
Returns
436436
-------
437-
labels : ndarray, shape (n_samples,)
437+
labels : ndarray of shape (n_samples,)
438438
Cluster labels.
439439
"""
440440
check_is_fitted(self)
@@ -457,8 +457,8 @@ def fit_predict(self, X, y=None):
457457
458458
Parameters
459459
----------
460-
X : array-like or sparse matrix, shape (n_samples, n_features), or \
461-
array-like, shape (n_samples, n_samples)
460+
X : {array-like, sparse matrix} of shape (n_samples, n_features), or \
461+
array-like of shape (n_samples, n_samples)
462462
Training instances to cluster, or similarities / affinities between
463463
instances if ``affinity='precomputed'``. If a sparse feature matrix
464464
is provided, it will be converted into a sparse ``csr_matrix``.
@@ -468,7 +468,7 @@ def fit_predict(self, X, y=None):
468468
469469
Returns
470470
-------
471-
labels : ndarray, shape (n_samples,)
471+
labels : ndarray of shape (n_samples,)
472472
Cluster labels.
473473
"""
474474
return super().fit_predict(X, y)

sklearn/cluster/_agglomerative.py

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -150,29 +150,29 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
150150
151151
Parameters
152152
----------
153-
X : array, shape (n_samples, n_features)
153+
X : array-like of shape (n_samples, n_features)
154154
feature matrix representing n_samples samples to be clustered
155155
156-
connectivity : sparse matrix (optional).
156+
connectivity : sparse matrix, default=None
157157
connectivity matrix. Defines for each sample the neighboring samples
158158
following a given structure of the data. The matrix is assumed to
159159
be symmetric and only the upper triangular half is used.
160160
Default is None, i.e, the Ward algorithm is unstructured.
161161
162-
n_clusters : int (optional)
162+
n_clusters : int, default=None
163163
Stop early the construction of the tree at n_clusters. This is
164164
useful to decrease computation time if the number of clusters is
165165
not small compared to the number of samples. In this case, the
166166
complete tree is not computed, thus the 'children' output is of
167167
limited use, and the 'parents' output should rather be used.
168168
This option is valid only when specifying a connectivity matrix.
169169
170-
return_distance : bool (optional)
170+
return_distance : bool, default=None
171171
If True, return the distance between the clusters.
172172
173173
Returns
174174
-------
175-
children : 2D array, shape (n_nodes-1, 2)
175+
children : ndarray of shape (n_nodes-1, 2)
176176
The children of each non-leaf node. Values less than `n_samples`
177177
correspond to leaves of the tree which are the original samples.
178178
A node `i` greater than or equal to `n_samples` is a non-leaf
@@ -186,11 +186,11 @@ def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
186186
n_leaves : int
187187
The number of leaves in the tree
188188
189-
parents : 1D array, shape (n_nodes, ) or None
189+
parents : ndarray of shape (n_nodes,) or None
190190
The parent of each node. Only returned when a connectivity matrix
191191
is specified, elsewhere 'None' is returned.
192192
193-
distances : 1D array, shape (n_nodes-1, )
193+
distances : ndarray of shape (n_nodes-1,)
194194
Only returned if return_distance is set to True (for compatibility).
195195
The distances between the centers of the nodes. `distances[i]`
196196
corresponds to a weighted euclidean distance between
@@ -356,24 +356,24 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
356356
357357
Parameters
358358
----------
359-
X : array, shape (n_samples, n_features)
359+
X : array-like of shape (n_samples, n_features)
360360
feature matrix representing n_samples samples to be clustered
361361
362-
connectivity : sparse matrix (optional).
362+
connectivity : sparse matrix, default=None
363363
connectivity matrix. Defines for each sample the neighboring samples
364364
following a given structure of the data. The matrix is assumed to
365365
be symmetric and only the upper triangular half is used.
366366
Default is None, i.e, the Ward algorithm is unstructured.
367367
368-
n_clusters : int (optional)
368+
n_clusters : int, default=None
369369
Stop early the construction of the tree at n_clusters. This is
370370
useful to decrease computation time if the number of clusters is
371371
not small compared to the number of samples. In this case, the
372372
complete tree is not computed, thus the 'children' output is of
373373
limited use, and the 'parents' output should rather be used.
374374
This option is valid only when specifying a connectivity matrix.
375375
376-
linkage : {"average", "complete", "single"}, optional, default: "complete"
376+
linkage : {"average", "complete", "single"}, default="complete"
377377
Which linkage criteria to use. The linkage criterion determines which
378378
distance to use between sets of observation.
379379
- average uses the average of the distances of each observation of
@@ -383,16 +383,16 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
383383
- single uses the minimum of the distances between all observations
384384
of the two sets.
385385
386-
affinity : string or callable, optional, default: "euclidean".
386+
affinity : str or callable, default="euclidean".
387387
which metric to use. Can be "euclidean", "manhattan", or any
388388
distance know to paired distance (see metric.pairwise)
389389
390-
return_distance : bool, default False
390+
return_distance : bool, default=False
391391
whether or not to return the distances between the clusters.
392392
393393
Returns
394394
-------
395-
children : 2D array, shape (n_nodes-1, 2)
395+
children : ndarray of shape (n_nodes-1, 2)
396396
The children of each non-leaf node. Values less than `n_samples`
397397
correspond to leaves of the tree which are the original samples.
398398
A node `i` greater than or equal to `n_samples` is a non-leaf
@@ -406,11 +406,11 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
406406
n_leaves : int
407407
The number of leaves in the tree.
408408
409-
parents : 1D array, shape (n_nodes, ) or None
409+
parents : ndarray of shape (n_nodes, ) or None
410410
The parent of each node. Only returned when a connectivity matrix
411411
is specified, elsewhere 'None' is returned.
412412
413-
distances : ndarray, shape (n_nodes-1,)
413+
distances : ndarray of shape (n_nodes-1,)
414414
Returned when return_distance is set to True.
415415
416416
distances[i] refers to the distance between children[i][0] and
@@ -636,7 +636,7 @@ def _hc_cut(n_clusters, children, n_leaves):
636636
n_clusters : int or ndarray
637637
The number of clusters to form.
638638
639-
children : 2D array, shape (n_nodes-1, 2)
639+
children : ndarray of shape (n_nodes-1, 2)
640640
The children of each non-leaf node. Values less than `n_samples`
641641
correspond to leaves of the tree which are the original samples.
642642
A node `i` greater than or equal to `n_samples` is a non-leaf
@@ -910,7 +910,8 @@ def fit_predict(self, X, y=None):
910910
911911
Parameters
912912
----------
913-
X : array-like, shape (n_samples, n_features) or (n_samples, n_samples)
913+
X : array-like of shape (n_samples, n_features) or \
914+
(n_samples, n_samples)
914915
Training instances to cluster, or distances between instances if
915916
``affinity='precomputed'``.
916917
@@ -919,7 +920,7 @@ def fit_predict(self, X, y=None):
919920
920921
Returns
921922
-------
922-
labels : ndarray, shape (n_samples,)
923+
labels : ndarray of shape (n_samples,)
923924
Cluster labels.
924925
"""
925926
return super().fit_predict(X, y)
@@ -957,7 +958,7 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
957958
kneighbors_graph. Default is None, i.e, the
958959
hierarchical clustering algorithm is unstructured.
959960
960-
compute_full_tree : 'auto' or bool, optional, default='auto'
961+
compute_full_tree : 'auto' or bool, default='auto'
961962
Stop early the construction of the tree at n_clusters. This is useful
962963
to decrease computation time if the number of clusters is not small
963964
compared to the number of features. This option is useful only when

sklearn/cluster/_bicluster.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def fit(self, X, y=None):
111111
112112
Parameters
113113
----------
114-
X : array-like, shape (n_samples, n_features)
114+
X : array-like of shape (n_samples, n_features)
115115
116116
y : Ignored
117117

sklearn/cluster/_dbscan.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def dbscan(X, eps=0.5, *, min_samples=5, metric='minkowski',
4646
The number of samples (or total weight) in a neighborhood for a point
4747
to be considered as a core point. This includes the point itself.
4848
49-
metric : string, or callable
49+
metric : str or callable, default='minkowski'
5050
The metric to use when calculating distance between instances in a
5151
feature array. If metric is a string or callable, it must be one of
5252
the options allowed by :func:`sklearn.metrics.pairwise_distances` for

sklearn/cluster/_feature_agglomeration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def transform(self, X):
3333
3434
Returns
3535
-------
36-
Y : array, shape = [n_samples, n_clusters] or [n_clusters]
36+
Y : ndarray of shape (n_samples, n_clusters) or (n_clusters,)
3737
The pooled values for each feature cluster.
3838
"""
3939
check_is_fitted(self)
@@ -67,7 +67,7 @@ def inverse_transform(self, Xred):
6767
6868
Returns
6969
-------
70-
X : array, shape=[n_samples, n_features] or [n_features]
70+
X : ndarray of shape (n_samples, n_features) or (n_features,)
7171
A vector of size n_samples with the values of Xred assigned to
7272
each of the cluster of samples.
7373
"""

0 commit comments

Comments
 (0)
0