23
23
from ..utils import check_array
24
24
from ..utils ._fast_dict import IntFloatDict
25
25
from ..utils .graph import _fix_connected_components
26
- from ..utils ._param_validation import Interval , StrOptions
26
+ from ..utils ._param_validation import Hidden , Interval , StrOptions
27
27
from ..utils .validation import check_memory
28
28
29
29
# mypy error: Module 'sklearn.cluster' has no attribute '_hierarchical_fast'
@@ -760,6 +760,19 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
760
760
If "precomputed", a distance matrix (instead of a similarity matrix)
761
761
is needed as input for the fit method.
762
762
763
+ .. deprecated:: 1.2
764
+ `affinity` was deprecated in version 1.2 and will be renamed to
765
+ `metric` in 1.4.
766
+
9E7A
767
+ metric : str or callable, default=None
768
+ Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
769
+ "manhattan", "cosine", or "precomputed". If set to `None` then
770
+ "euclidean" is used. If linkage is "ward", only "euclidean" is
771
+ accepted. If "precomputed", a distance matrix is needed as input for
772
+ the fit method.
773
+
774
+ .. versionadded:: 1.2
775
+
763
776
memory : str or object with the joblib.Memory interface, default=None
764
777
Used to cache the output of the computation of the tree.
765
778
By default, no caching is done. If a string is given, it is the
@@ -880,9 +893,15 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
880
893
_parameter_constraints = {
881
894
"n_clusters" : [Interval (Integral , 1 , None , closed = "left" ), None ],
882
895
"affinity" : [
896
+ Hidden (StrOptions ({"deprecated" })),
883
897
StrOptions (set (_VALID_METRICS ) | {"precomputed" }),
884
898
callable ,
885
899
],
900
+ "metric" : [
901
+ StrOptions (set (_VALID_METRICS ) | {"precomputed" }),
902
+ callable ,
903
+ None ,
904
+ ],
886
905
"memory" : "no_validation" , # TODO
887
906
"connectivity" : ["array-like" , callable , None ],
888
907
"compute_full_tree" : [StrOptions ({&qu
F438
ot;auto" }), "boolean" ],
@@ -895,7 +914,8 @@ def __init__(
895
914
self ,
896
915
n_clusters = 2 ,
897
916
* ,
898
- affinity = "euclidean" ,
917
+ affinity = "deprecated" , # TODO(1.4): Remove
918
+ metric = None , # TODO(1.4): Set to "euclidean"
899
919
memory = None ,
900
920
connectivity = None ,
901
921
compute_full_tree = "auto" ,
@@ -910,6 +930,7 @@ def __init__(
910
930
self .compute_full_tree = compute_full_tree
911
931
self .linkage = linkage
912
932
self .affinity = affinity
933
+ self .metric = metric
913
934
self .compute_distances = compute_distances
914
935
915
936
def fit (self , X , y = None ):
@@ -920,7 +941,7 @@ def fit(self, X, y=None):
920
941
X : array-like, shape (n_samples, n_features) or \
921
942
(n_samples, n_samples)
922
943
Training instances to cluster, or distances between instances if
923
- ``affinity ='precomputed'``.
944
+ ``metric ='precomputed'``.
924
945
925
946
y : Ignored
926
947
Not used, present here for API consistency by convention.
@@ -950,6 +971,24 @@ def _fit(self, X):
950
971
"""
951
972
memory = check_memory (self .memory )
952
973
974
+ self ._metric = self .metric
975
+ # TODO(1.4): Remove
976
+ if self .affinity != "deprecated" :
977
+ if self .metric is not None :
978
+ raise ValueError (
979
+ "Both `affinity` and `metric` attributes were set. Attribute"
980
+ " `affinity` was deprecated in version 1.2 and will be removed in"
981
+ " 1.4. To avoid this error, only set the `metric` attribute."
982
+ )
983
+ warnings .warn (
984
+ "Attribute `affinity` was deprecated in version 1.2 and will be removed"
985
10000
+ " in 1.4. Use `metric` instead" ,
986
+ FutureWarning ,
987
+ )
988
+ self ._metric = self .affinity
989
+ elif self .metric is None :
990
+ self ._metric = "euclidean"
991
+
953
992
if not ((self .n_clusters is None ) ^ (self .distance_threshold is None )):
954
993
raise ValueError (
955
994
"Exactly one of n_clusters and "
@@ -962,10 +1001,10 @@ def _fit(self, X):
962
1001
"compute_full_tree must be True if distance_threshold is set."
963
1002
)
964
1003
965
- if self .linkage == "ward" and self .affinity != "euclidean" :
1004
+ if self .linkage == "ward" and self ._metric != "euclidean" :
966
1005
raise ValueError (
967
- "%s was provided as affinity . Ward can only "
968
- "work with euclidean distances." % ( self . affinity ,)
1006
+ f" { self . _metric } was provided as metric . Ward can only "
1007
+ "work with euclidean distances."
969
1008
)
970
1009
971
1010
tree_builder = _TREE_BUILDERS [self .linkage ]
@@ -998,7 +1037,7 @@ def _fit(self, X):
998
1037
kwargs = {}
999
1038
if self .linkage != "ward" :
1000
1039
kwargs ["linkage" ] = self .linkage
1001
- kwargs ["affinity" ] = self .affinity
1040
+ kwargs ["affinity" ] = self ._metric
1002
1041
1003
1042
distance_threshold = self .distance_threshold
1004
1043
@@ -1084,6 +1123,19 @@ class FeatureAgglomeration(
1084
1123
If "precomputed", a distance matrix (instead of a similarity matrix)
1085
1124
is needed as input for the fit method.
1086
1125
1126
+ .. deprecated:: 1.2
1127
+ `affinity` was deprecated in version 1.2 and will be renamed to
1128
+ `metric` in 1.4.
1129
+
1130
+ metric : str or callable, default=None
1131
+ Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
1132
+ "manhattan", "cosine", or "precomputed". If set to `None` then
1133
+ "euclidean" is used. If linkage is "ward", only "euclidean" is
1134
+ accepted. If "precomputed", a distance matrix is needed as input for
1135
+ the fit method.
1136
+
1137
+ .. versionadded:: 1.2
1138
+
1087
1139
memory : str or object with the joblib.Memory interface, default=None
1088
1140
Used to cache the output of the computation of the tree.
1089
1141
By default, no caching is done. If a string is given, it is the
@@ -1208,8 +1260,14 @@ class FeatureAgglomeration(
1208
1260
_parameter_constraints = {
1209
1261
"n_clusters" : [Interval (Integral , 1 , None , closed = "left" ), None ],
1210
1262
"affinity" : [
1263
+ Hidden (StrOptions ({"deprecated" })),
1264
+ StrOptions (set (_VALID_METRICS ) | {"precomputed" }),
1265
+ callable ,
1266
+ ],
1267
+ "metric" : [
1211
1268
StrOptions (set (_VALID_METRICS ) | {"precomputed" }),
1212
1269
callable ,
1270
+ None ,
1213
1271
],
1214
1272
"memory" : "no_validation" , # TODO
1215
1273
"connectivity" : ["array-like" , callable , None ],
@@ -1224,7 +1282,8 @@ def __init__(
1224
1282
self ,
1225
1283
n_clusters = 2 ,
1226
1284
* ,
1227
- affinity = "euclidean" ,
1285
+ affinity = "deprecated" , # TODO(1.4): Remove
1286
+ metric = None , # TODO(1.4): Set to "euclidean"
1228
1287
memory = None ,
1229
1288
connectivity = None ,
1230
1289
compute_full_tree = "auto" ,
@@ -1240,6 +1299,7 @@ def __init__(
1240
1299
compute_full_tree = compute_full_tree ,
1241
1300
linkage = linkage ,
1242
1301
affinity = affinity ,
1302
+ metric = metric ,
1243
1303
distance_threshold = distance_threshold ,
1244
1304
compute_distances = compute_distances ,
1245
1305
)
0 commit comments