@@ -133,7 +133,8 @@ def check_paired_arrays(X, Y):
133
133
134
134
135
135
# Pairwise distances
136
- def euclidean_distances (X , Y = None , Y_norm_squared = None , squared = False ):
136
+ def euclidean_distances (X , Y = None , Y_norm_squared = None , squared = False ,
137
+ X_norm_squared = None ):
137
138
"""
138
139
Considering the rows of X (and Y=X) as vectors, compute the
139
140
distance matrix between each pair of vectors.
@@ -145,8 +146,8 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False):
145
146
146
147
This formulation has two advantages over other ways of computing distances.
147
148
First, it is computationally efficient when dealing with sparse data.
148
- Second, if x varies but y remains unchanged, then the right-most dot
149
- product `dot(y, y)` can be pre-computed.
149
+ Second, if one argument varies but the other remains unchanged, then
150
+ `dot(x, x)` and/or `dot(y, y)` can be pre-computed.
150
151
151
152
However, this is not the most precise way of doing this computation, and
152
153
the distance matrix returned by this function may not be exactly
@@ -167,6 +168,10 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False):
167
168
squared : boolean, optional
168
169
Return squared Euclidean distances.
169
170
171
+ X_norm_squared : array-like, shape = [n_samples_1], optional
172
+ Pre-computed dot-products of vectors in X (e.g.,
173
+ ``(X**2).sum(axis=1)``)
174
+
170
175
Returns
171
176
-------
172
177
distances : {array, sparse matrix}, shape (n_samples_1, n_samples_2)
@@ -188,24 +193,28 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False):
188
193
--------
189
194
paired_distances : distances betweens pairs of elements of X and Y.
190
195
"""
191
- # should not need X_norm_squared because if you could precompute that as
192
- # well as Y, then you should just pre-compute the output and not even
193
- # call this function.
194
196
X , Y = check_pairwise_arrays (X , Y )
195
197
196
- if Y_norm_squared is not None :
198
+ if X_norm_squared is not None :
199
+ XX = check_array (X_norm_squared )
200
+ if XX .shape == (1 , X .shape [0 ]):
201
+ XX = XX .T
202
+ elif XX .shape != (X .shape [0 ], 1 ):
203
+ raise ValueError (
204
+ "Incompatible dimensions for X and X_norm_squared" )
205
+ else :
206
+ XX = row_norms (X , squared = True )[:, np .newaxis ]
207
+
208
+ if X is Y : # shortcut in the common case euclidean_distances(X, X)
209
+ YY = XX .T
210
+ elif Y_norm_squared is not None :
197
211
YY = check_array (Y_norm_squared )
198
212
if YY .shape != (1 , Y .shape [0 ]):
199
213
raise ValueError (
200
214
"Incompatible dimensions for Y and Y_norm_squared" )
201
215
else :
202
216
YY = row_norms (Y , squared = True )[np .newaxis , :]
203
217
204
- if X is Y : # shortcut in the common case euclidean_distances(X, X)
205
- XX = YY .T
206
- else :
207
- XX = row_norms (X , squared = True )[:, np .newaxis ]
208
-
209
218
distances = safe_sparse_dot (X , Y .T , dense_output = True )
210
219
distances *= - 2
211
220
distances += XX
0 commit comments