8000 Merge pull request #7034 from jakirkham/bench_matmul · numpy/numpy@d65d871 · GitHub
[go: up one dir, main page]

Skip to content

Commit d65d871

Browse files
committed
Merge pull request #7034 from jakirkham/bench_matmul
BENCH, DOC: Benchmark matmul and update documentation
2 parents 0bacdf6 + 1504975 commit d65d871

File tree

2 files changed

+31
-12
lines changed

2 files changed

+31
-12
lines changed

benchmarks/benchmarks/bench_linalg.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ def setup(self):
1818
self.a3 = np.arange(480000.).reshape(60, 80, 100)
1919
self.b3 = np.arange(192000.).reshape(80, 60, 40)
2020

21-
def time_einsum_ij_jk_a_b(self):
22-
np.einsum('ij,jk', self.a, self.b)
23-
2421
def time_dot_a_b(self):
2522
np.dot(self.a, self.b)
2623

24+
def time_dot_d_dot_b_c(self):
25+
np.dot(self.d, np.dot(self.b, self.c))
26+
2727
def time_dot_trans_a_at(self):
2828
np.dot(self.a, self.at)
2929

@@ -36,20 +36,38 @@ def time_dot_trans_at_a(self):
3636
def time_dot_trans_atc_a(self):
3737
np.dot(self.atc, self.a)
3838

39+
def time_einsum_i_ij_j(self):
40+
np.einsum('i,ij,j', self.d, self.b, self.c)
41+
42+
def time_einsum_ij_jk_a_b(self):
43+
np.einsum('ij,jk', self.a, self.b)
44+
45+
def time_einsum_ijk_jil_kl(self):
46+
np.einsum('ijk,jil->kl', self.a3, self.b3)
47+
3948
def time_inner_trans_a_a(self):
4049
np.inner(self.a, self.a)
4150

4251
def time_inner_trans_a_ac(self):
4352
np.inner(self.a, self.ac)
4453

45-
def time_einsum_i_ij_j(self):
46-
np.einsum('i,ij,j', self.d, self.b, self.c)
54+
def time_matmul_a_b(self):
55+
np.matmul(self.a, self.b)
4756

48-
def time_dot_d_dot_b_c(self):
49-
np.dot(self.d, np.dot(self.b, self.c))
57+
def time_matmul_d_matmul_b_c(self):
58+
np.matmul(self.d, np.matmul(self.b, self.c))
5059

51-
def time_einsum_ijk_jil_kl(self):
52-
np.einsum('ijk,jil->kl', self.a3, self.b3)
60+
def time_matmul_trans_a_at(self):
61+
np.matmul(self.a, self.at)
62+
63+
def time_matmul_trans_a_atc(self):
64+
np.matmul(self.a, self.atc)
65+
66+
def time_matmul_trans_at_a(self):
67+
np.matmul(self.at, self.a)
68+
69+
def time_matmul_trans_atc_a(self):
70+
np.matmul(self.atc, self.a)
5371

5472
def time_tensordot_a_b_axes_1_0_0_1(self):
5573
np.tensordot(self.a3, self.b3, axes=([1, 0], [0, 1]))

doc/release/1.11.0-notes.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,11 +149,12 @@ useless computations when printing a masked array.
149149
The function now uses the fallocate system call to reserve sufficient
150150
diskspace on filesystems that support it.
151151

152-
``np.dot`` optimized for operations of the form ``A.T @ A`` and ``A @ A.T``
153-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
152+
Optimizations for operations of the form ``A.T @ A`` and ``A @ A.T``
153+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
154154
Previously, ``gemm`` BLAS operations were used for all matrix products. Now,
155155
if the matrix product is between a matrix and its transpose, it will use
156-
``syrk`` BLAS operations for a performance boost.
156+
``syrk`` BLAS operations for a performance boost. This optimization has been
157+
extended to ``@``, ``numpy.dot``, ``numpy.inner``, and ``numpy.matmul``.
157158

158159
**Note:** Requires the transposed and non-transposed matrices to share data.
159160

0 commit comments

Comments
 (0)
0