@@ -167,21 +167,6 @@ def _tolerance(X, tol):
167
167
return np .mean (variances ) * tol
168
168
169
169
170
- def _check_normalize_sample_weight (sample_weight , X ):
171
- """Set sample_weight if None, and check for correct dtype"""
172
-
173
- sample_weight_was_none = sample_weight is None
174
-
175
- sample_weight = _check_sample_weight (sample_weight , X , dtype = X .dtype )
176
- if not sample_weight_was_none :
177
- # normalize the weights to sum up to n_samples
178
- # an array of 1 (i.e. samples_weight is None) is already normalized
179
- n_samples = len (sample_weight )
180
- scale = n_samples / sample_weight .sum ()
181
- sample_weight = sample_weight * scale
182
- return sample_weight
183
-
184
-
185
170
@_deprecate_positional_args
186
171
def k_means (X , n_clusters , * , sample_weight = None , init = 'k-means++' ,
187
172
precompute_distances = 'deprecated' , n_init = 10 , max_iter = 300 ,
@@ -399,7 +384,7 @@ def _kmeans_single_elkan(X, sample_weight, n_clusters, max_iter=300,
399
384
Number of iterations run.
400
385
"""
401
386
random_state = check_random_state (random_state )
402
- sample_weight = _check_normalize_sample_weight (sample_weight , X )
387
+ sample_weight = _check_sample_weight (sample_weight , X , dtype = X . dtype )
403
388
404
389
# init
405
390
centers = _init_centroids (X , n_clusters , init , random_state = random_state ,
@@ -546,7 +531,7 @@ def _kmeans_single_lloyd(X, sample_weight, n_clusters, max_iter=300,
546
531
Number of iterations run.
547
532
"""
548
533
random_state = check_random_state (random_state )
549
- sample_weight = _check_normalize_sample_weight (sample_weight , X )
534
+ sample_weight = _check_sample_weight (sample_weight , X , dtype = X . dtype )
550
535
551
536
# init
552
537
centers = _init_centroids (X , n_clusters , init , random_state = random_state ,
@@ -639,7 +624,7 @@ def _labels_inertia(X, sample_weight, x_squared_norms, centers,
639
624
640
625
n_threads = _openmp_effective_n_threads (n_threads )
641
626
642
- sample_weight = _check_normalize_sample_weight (sample_weight , X )
627
+ sample_weight = _check_sample_weight (sample_weight , X , dtype = X . dtype )
643
628
labels = np .full (n_samples , - 1 , dtype = np .int32 )
644
629
weight_in_clusters = np .zeros (n_clusters , dtype = centers .dtype )
645
630
center_shift = np .zeros_like (weight_in_clusters )
@@ -1620,7 +1605,7 @@ def fit(self, X, y=None, sample_weight=None):
1620
1605
raise ValueError ("n_samples=%d should be >= n_clusters=%d"
1621
1606
% (n_samples , self .n_clusters ))
1622
1607
1623
- sample_weight = _check_normalize_sample_weight (sample_weight , X )
1608
+ sample_weight = _check_sample_weight (sample_weight , X , dtype = X . dtype )
1624
1609
1625
1610
n_init = self .n_init
1626
1611
if hasattr (self .init , '__array__' ):
@@ -1769,7 +1754,7 @@ def _labels_inertia_minibatch(self, X, sample_weight):
1769
1754
"""
1770
1755
if self .verbose :
1771
1756
print ('Computing label assignment and total inertia' )
1772
- sample_weight = _check_normalize_sample_weight (sample_weight , X )
1757
+ sample_weight = _check_sample_weight (sample_weight , X , dtype = X . dtype )
1773
1758
x_squared_norms = row_norms (X , squared = True )
1774
1759
slices = gen_batches (X .shape [0 ], self .batch_size )
1775
1760
results = [_labels_inertia (X [s ], sample_weight [s ], x_squared_norms [s ],
@@ -1807,7 +1792,7 @@ def partial_fit(self, X, y=None, sample_weight=None):
1807
1792
if n_samples == 0 :
1808
1793
return self
1809
1794
1810
- sample_weight = _check_normalize_sample_weight (sample_weight , X )
1795
+ sample_weight = _check_sample_weight (sample_weight , X , dtype = X . dtype )
1811
1796
1812
1797
x_squared_norms = row_norms (X , squared = True )
1813
1798
self .random_state_ = getattr (self , "random_state_" ,
0 commit comments