@@ -145,7 +145,8 @@ def check_paired_arrays(X, Y):
145
145
146
146
147
147
# Pairwise distances
148
- def euclidean_distances (X , Y = None , Y_norm_squared = None , squared = False ):
148
+ def euclidean_distances (X , Y = None , Y_norm_squared = None , squared = False ,
149
+ X_norm_squared = None ):
149
150
"""
150
151
Considering the rows of X (and Y=X) as vectors, compute the
151
152
distance matrix between each pair of vectors.
@@ -157,8 +158,8 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False):
157
158
158
159
This formulation has two advantages over other ways of computing distances.
159
160
First, it is computationally efficient when dealing with sparse data.
160
- Second, if x varies but y remains unchanged, then the right-most dot
161
- product `dot(y, y)` can be pre-computed.
161
+ Second, if one argument varies but the other remains unchanged, then
162
+ `dot(x, x)` and/or `dot(y, y)` can be pre-computed.
162
163
163
164
However, this is not the most precise way of doing this computation, and
164
165
the distance matrix returned by this function may not be exactly
@@ -179,6 +180,10 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False):
179
180
squared : boolean, optional
180
181
Return squared Euclidean distances.
181
182
183
+ X_norm_squared : array-like, shape = [n_samples_1], optional
184
+ Pre-computed dot-products of vectors in X (e.g.,
185
+ ``(X**2).sum(axis=1)``)
186
+
182
187
Returns
183
188
-------
184
189
distances : {array, sparse matrix}, shape (n_samples_1, n_samples_2)
@@ -200,24 +205,28 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False):
200
205
--------
201
206
paired_distances : distances betweens pairs of elements of X and Y.
202
207
"""
203
- # should not need X_norm_squared because if you could precompute that as
204
- # well as Y, then you should just pre-compute the output and not even
205
- # call this function.
206
208
X , Y = check_pairwise_arrays (X , Y )
207
209
208
- if Y_norm_squared is not None :
210
+ if X_norm_squared is not None :
211
+ XX = check_array (X_norm_squared )
212
+ if XX .shape == (1 , X .shape [0 ]):
213
+ XX = XX .T
214
+ elif XX .shape != (X .shape [0 ], 1 ):
215
+ raise ValueError (
216
+ "Incompatible dimensions for X and X_norm_squared" )
217
+ else :
218
+ XX = row_norms (X , squared = True )[:, np .newaxis ]
219
+
220
+ if X is Y : # shortcut in the common case euclidean_distances(X, X)
221
+ YY = XX .T
222
+ elif Y_norm_squared is not None :
209
223
YY = check_array (Y_norm_squared )
210
224
if YY .shape != (1 , Y .shape [0 ]):
211
225
raise ValueError (
212
226
"Incompatible dimensions for Y and Y_norm_squared" )
213
227
else :
214
228
YY = row_norms (Y , squared = True )[np .newaxis , :]
215
229
216
- if X is Y : # shortcut in the common case euclidean_distances(X, X)
217
- XX = YY .T
218
- else :
219
- XX = row_norms (X , squared = True )[:, np .newaxis ]
220
-
221
230
distances = safe_sparse_dot (X , Y .T , dense_output = True )
222
231
distances *= - 2
223
232
distances += XX
0 commit comments