@@ -170,98 +170,46 @@ def __numpy_ufunc__(self, ufunc, method, pos, inputs, **kwargs):
170
170
assert_equal (c .dot (a ), "A" )
171
171
assert_raises (TypeError , np .dot , b , c )
172
172
assert_raises (TypeError , c .dot , b )
173
-
174
-
173
+
174
+
175
175
def test_npdot_segfault ():
176
176
if sys .platform != 'darwin' : return
177
-
178
177
# Test for float32 np.dot segfault
179
178
# https://github.com/numpy/numpy/issues/4007
180
-
181
- def aligned_array (N , align , dtype ):
179
+
180
+ def aligned_array (shape , align , dtype , order = 'C' ):
181
+ # Make array shape `shape` with aligned at `align` bytes
182
182
d = dtype ()
183
+ # Make array of correct size with `align` extra bytes
184
+ N = np .prod (shape )
183
185
tmp = np .zeros (N * d .nbytes + align , dtype = np .uint8 )
184
186
address = tmp .__array_interface__ ["data" ][0 ]
187
+ # Find offset into array giving desired alignment
185
188
for offset in range (align ):
186
189
if (address + offset ) % align == 0 : break
187
- return tmp [offset :offset + N * d .nbytes ].view (dtype = dtype )
188
-
189
- m = aligned_array (100 ,15 ,np .float32 )
190
- s = aligned_array (10000 ,15 ,np .float32 ).reshape (100 ,100 )
191
- np .dot (s ,m ) # This always segfaults when the sgemv alignment bug is present
192
-
193
- # test the sanity of np.dot after applying patch
194
- # misaligned arrays trigger SGEMM
195
- m0 = np .random .rand (200 )
196
- s0 = np .random .rand (10000 ,200 )
197
- m1 = aligned_array (200 ,15 ,np .float32 )
198
- m1 [:] = m0
199
- s1 = aligned_array (10000 * 200 ,15 ,np .float32 ).reshape ((10000 ,200 ))
200
- s2 = aligned_array (10000 * 200 ,15 ,np .float32 ).reshape ((10000 ,200 ),order = 'F' )
201
- s1 [:,:] = s0
202
- s2 [:,:] = s0
203
- desired = np .dot (s0 ,m0 ).astype (np .float32 )
204
- assert_allclose (np .dot (s1 ,m1 ),desired ,atol = 0.01 )
205
- assert_allclose (np .dot (s2 ,m1 ),desired ,atol = 0.01 )
206
-
207
- m0 = np .random .rand (200 )
208
- s0 = np .random .rand (200 ,10000 )
209
- m1 = aligned_array (200 ,15 ,np .float32 )
210
- m1 [:] = m0
211
- s1 = aligned_array (10000 * 200 ,15 ,np .float32 ).reshape ((200 ,10000 ))
212
- s2 = aligned_array (10000 * 200 ,15 ,np .float32 ).reshape ((200 ,10000 ),order = 'F' )
213
- s1 [:,:] = s0
214
- s2 [:,:] = s0
215
- desired = np .dot (s0 .T ,m0 ).astype (np .float32 )
216
- assert_allclose (np .dot (s1 .T ,m1 ),desired ,atol = 0.01 )
217
- assert_allclose (np .dot (s2 .T ,m1 ),desired ,atol = 0.01 )
218
-
219
- m0 = np .random .rand (89 )
220
- s0 = np .random .rand (10000 ,89 )
221
- m1 = aligned_array (89 ,15 ,np .float32 )
222
- m1 [:] = m0
223
- s1 = aligned_array (10000 * 89 ,15 ,np .float32 ).reshape ((10000 ,89 ))
224
- s2 = aligned_array (10000 * 89 ,15 ,np .float32 ).reshape ((10000 ,89 ),order = 'F' )
225
- s1 [:,:] = s0
226
- s2 [:,:] = s0
227
- desired = np .dot (s0 ,m0 ).astype (np .float32 )
228
- assert_allclose (np .dot (s1 ,m1 ),desired ,atol = 0.01 )
229
- assert_allclose (np .dot (s2 ,m1 ),desired ,atol = 0.01 )
190
+ tmp = tmp [offset :offset + N * d .nbytes ].view (dtype = dtype )
191
+ return tmp .reshape (shape , order = order )
230
192
193
+ m = aligned_array (100 , 15 , np .float32 )
194
+ s = aligned_array ((100 , 100 ), 15 , np .float32 )
195
+ # This always segfaults when the sgemv alignment bug is present
196
+ np .dot (s , m )
231
197
# test the sanity of np.dot after applying patch
232
- # 32 byte aligned arrays trigger SGEMV
233
- m0 = np .random .rand (200 )
234
- s0 = np .random .rand (10000 ,200 )
235
- m1 = aligned_array (200 ,32 ,np .float32 )
236
- m1 [:] = m0
237
- s1 = aligned_array (10000 * 200 ,32 ,np .float32 ).reshape ((10000 ,200 ))
238
- s2 = aligned_array (10000 * 200 ,32 ,np .float32 ).reshape ((10000 ,200 ),order = 'F' )
239
- s1 [:,:] = s0
240
- s2 [:,:] = s0
241
- desired = np .dot (s0 ,m0 ).astype (np .float32 )
242
- assert_allclose (np .dot (s1 ,m1 ),desired ,atol = 0.01 )
243
- assert_allclose (np .dot (s2 ,m1 ),desired ,atol = 0.01 )
244
-
245
- m0 = np .random .rand (200 )
246
- s0 = np .random .rand (200 ,10000 )
247
- m1 = aligned_array (200 ,32 ,np .float32 )
248
- m1 [:] = m0
249
- s1 = aligned_array (10000 * 200 ,32 ,np .float32 ).reshape ((200 ,10000 ))
250
- s2 = aligned_array (10000 * 200 ,32 ,np .float32 ).reshape ((200 ,10000 ),order = 'F' )
251
- s1 [:,:] = s0
252
- s2 [:,:] = s0
253
- desired = np .dot (s0 .T ,m0 ).astype (np .float32 )
254
- assert_allclose (np .dot (s1 .T ,m1 ),desired ,atol = 0.01 )
255
- assert_allclose (np .dot (s2 .T ,m1 ),desired ,atol = 0.01 )
256
-
257
- m0 = np .random .rand (89 )
258
- s0 = np .random .rand (10000 ,89 )
259
- m1 = aligned_array (89 ,32 ,np .float32 )
260
- m1 [:] = m0
261
- s1 = aligned_array (10000 * 89 ,32 ,np .float32 ).reshape ((10000 ,89 ))
262
- s2 = aligned_array (10000 * 89 ,32 ,np .float32 ).reshape ((10000 ,89 ),order = 'F' )
263
- s1 [:,:] = s0
264
- s2 [:,:] = s0
265
- desired = np .dot (s0 ,m0 ).astype (np .float32 )
266
- assert_allclose (np .dot (s1 ,m1 ),desired ,atol = 0.01 )
267
- assert_allclose (np .dot (s2 ,m1 ),desired ,atol = 0.01 )
198
+ for align in (15 , 32 ):
199
+ for m , n , trans in ((10000 , 200 , False ),
200
+ (10000 , 200 , True ),
201
+ (10000 , 89 , True )):
202
+ m0 = np .random .rand (n )
203
+ s_shape = (n , m ) if trans else (m , n )
204
+ s0 = np .random .rand (* s_shape )
205
+ m1 = aligned_array (n , align , np .float32 )
206
+ m1 [:] = m0
207
+ s1 = aligned_array (s_shape , align , np .float32 )
208
+ s2 = aligned_array (s_shape , align , np .float32 ,order = 'F' )
209
+ s1 [:, :] = s0
210
+ s2 [:, :] = s0
211
+ if trans :
212
+ s0 , s1 , s2 = s0 .T , s1 .T , s2 .T
213
+ desired = np .dot (s0 , m0 ).astype (np .float32 )
214
+ assert_allclose (np .dot (s1 , m1 ), desired , rtol = 1e-5 , atol = 1e-5 )
215
+ assert_allclose (np .dot (s2 , m1 ), desired , rtol = 1e-5 , atol = 1e-5 )
0 commit comments