10000 add another mlir · srcarroll/mmperf@914cbb6 · GitHub
[go: up one dir, main page]

Skip to content

Commit 914cbb6

Browse files
committed
add another mlir
1 parent 3a107a9 commit 914cbb6

File tree

1 file changed

+217
-0
lines changed

1 file changed

+217
-0
lines changed

asm/mlir_matmul_480x512x16.asm

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
b/matmul/matmul_480x512x16:
2+
(__TEXT,__text) section
3+
_rtclock:
4+
0000000100003b68 sub sp, sp, #0x30
5+
0000000100003b6c stp x29, x30, [sp, #0x20] ; Latency: 6
6+
0000000100003b70 add x29, sp, #0x20
7+
0000000100003b74 add x0, sp, #0x8
8+
0000000100003b78 sub x1, x29, #0x8
9+
0000000100003b7c bl 0x100003edc ; symbol stub for: _gettimeofday
10+
0000000100003b80 cbz w0, 0x100003b94
11+
0000000100003b84 str x0, [sp] ; Latency: 4
12+
0000000100003b88 adr x0, #0x3e0 ; literal pool for: "Error return from gettimeofday: %d"
13+
0000000100003b8c nop
14+
0000000100003b90 bl 0x100003ee8 ; symbol stub for: _printf
15+
0000000100003b94 ldr d0, [sp, #0x8] ; Latency: 4
16+
0000000100003b98 scvtf d0, d0 ; Latency: 2
17+
0000000100003b9c ldr s1, [sp, #0x10] ; Latency: 4
18+
0000000100003ba0 sshll.2d v1, v1, #0x0 ; Latency: 2
19+
0000000100003ba4 scvtf d1, d1 ; Latency: 2
20+
0000000100003ba8 nop
21+
0000000100003bac ldr d2, 0x100003f60 ; Latency: 4
22+
0000000100003bb0 fmul d1, d1, d2 ; Latency: 5
23+
0000000100003bb4 fadd d0, d1, d0 ; Latency: 5
24+
0000000100003bb8 ldp x29, x30, [sp, #0x20] ; Latency: 4
25+
0000000100003bbc add sp, sp, #0x30
26+
0000000100003bc0 ret
27+
_init_matrix:
28+
0000000100003bc4 stp x26, x25, [sp, #-0x50]! ; Latency: 6
29+
0000000100003bc8 stp x24, x23, [sp, #0x10] ; Latency: 6
30+
0000000100003bcc stp x22, x21, [sp, #0x20] ; Latency: 6
31+
0000000100003bd0 stp x20, x19, [sp, #0x30] ; Latency: 6
32+
0000000100003bd4 stp x29, x30, [sp, #0x40] ; Latency: 6
33+
0000000100003bd8 add x29, sp, #0x40
34+
0000000100003bdc cmp w2, #0x1
35+
0000000100003be0 b.lt 0x100003c38
36+
0000000100003be4 cmp w1, #0x1
37+
0000000100003be8 b.lt 0x100003c38
38+
0000000100003bec mov x19, x0 ; Latency: 2
39+
0000000100003bf0 mov x20, #0x0
40+
0000000100003bf4 mov w21, w1 ; Latency: 2
41+
0000000100003bf8 mov w22, w2 ; Latency: 2
42+
0000000100003bfc lsl x23, x21, #2
43+
0000000100003c00 mov w24, #0x30000000
44+
0000000100003c04 mov x25, x21 ; Latency: 2
45+
0000000100003c08 mov x26, x19 ; Latency: 2
46+
0000000100003c0c bl 0x100003ef4 ; symbol stub for: _rand
47+
0000000100003c10 scvtf s0, w0 ; Latency: 10
48+
0000000100003c14 fmov s1, w24 ; Latency: 5
49+
0000000100003c18 fmul s0, s0, s1 ; Latency: 4
50+
0000000100003c1c str s0, [x26], #0x4 ; Latency: 4
51+
0000000100003c20 subs x25, x25, #0x1
52+
0000000100003c24 b.ne 0x100003c0c
53+
0000000100003c28 add x20, x20, #0x1
54+
0000000100003c2c add x19, x19, x23 ; Latency: 2
55+
0000000100003c30 cmp x20, x22 ; Latency: 2
56+
0000000100003c34 b.ne 0x100003c04
57+
0000000100003c38 ldp x29, x30, [sp, #0x40] ; Latency: 4
58+
0000000100003c3c ldp x20, x19, [sp, #0x30] ; Latency: 4
59+
0000000100003c40 ldp x22, x21, [sp, #0x20] ; Latency: 4
60+
0000000100003c44 ldp x24, x23, [sp, #0x10] ; Latency: 4
61+
0000000100003c48 ldp x26, x25, [sp], #0x50 ; Latency: 4
62+
0000000100003c4c ret
63+
_main:
64+
0000000100003c50 sub sp, sp, #0x50
65+
0000000100003c54 stp d9, d8, [sp, #0x20] ; Latency: 6
66+
0000000100003c58 stp x20, x19, [sp, #0x30] ; Latency: 6
67+
0000000100003c5c stp x29, x30, [sp, #0x40] ; Latency: 6
68+
0000000100003c60 add x29, sp, #0x40
69+
0000000100003c64 add x0, sp, #0x8
70+
0000000100003c68 add x1, sp, #0x18
71+
0000000100003c6c bl 0x100003edc ; symbol stub for: _gettimeofday
72+
0000000100003c70 cbz w0, 0x100003c84
73+
0000000100003c74 str x0, [sp] ; Latency: 4
74+
0000000100003c78 adr x0, #0x2f0 ; literal pool for: "Error return from gettimeofday: %d"
75+
0000000100003c7c nop
76+
0000000100003c80 bl 0x100003ee8 ; symbol stub for: _printf
77+
0000000100003c84 ldr x19, [sp, #0x8] ; Latency: 4
78+
0000000100003c88 ldr s0, [sp, #0x10] ; Latency: 4
79+
0000000100003c8c sshll.2d v0, v0, #0x0 ; Latency: 2
80+
0000000100003c90 scvtf d0, d0 ; Latency: 2
81+
0000000100003c94 nop
82+
0000000100003c98 ldr d8, 0x100003f60 ; Latency: 4
83+
0000000100003c9c fmul d9, d0, d8 ; Latency: 5
84+
0000000100003ca0 mov w20, #0x1e0
85+
0000000100003ca4 bl 0x100003ef4 ; symbol stub for: _rand
86+
0000000100003ca8 subs x20, x20, #0x1
87+
0000000100003cac b.ne 0x100003ca4
88+
0000000100003cb0 mov w20, #0x1e0
89+
0000000100003cb4 bl 0x100003ef4 ; symbol stub for: _rand
90+
0000000100003cb8 subs x20, x20, #0x1
91+
0000000100003cbc b.ne 0x100003cb4
92+
0000000100003cc0 mov w20, #0x1e0
93+
0000000100003cc4 bl 0x100003ef4 ; symbol stub for: _rand
94+
0000000100003cc8 subs x20, x20, #0x1
95+
0000000100003ccc b.ne 0x100003cc4
96+
0000000100003cd0 mov w20, #0x1e0
97+
0000000100003cd4 bl 0x100003ef4 ; symbol stub for: _rand
98+
0000000100003cd8 subs x20, x20, #0x1
99+
0000000100003cdc b.ne 0x100003cd4
100+
0000000100003ce0 mov w20, #0x1e0
101+
0000000100003ce4 bl 0x100003ef4 ; symbol stub for: _rand
102+
0000000100003ce8 subs x20, x20, #0x1
103+
0000000100003cec b.ne 0x100003ce4
104+
0000000100003cf0 mov w20, #0x1e0
105+
0000000100003cf4 bl 0x100003ef4 ; symbol stub for: _rand
106+
0000000100003cf8 subs x20, x20, #0x1
107+
0000000100003cfc b.ne 0x100003cf4
108+
0000000100003d00 mov w20, #0x1e0
109+
0000000100003d04 bl 0x100003ef4 ; symbol stub for: _rand
110+
0000000100003d08 subs x20, x20, #0x1
111+
0000000100003d0c b.ne 0x100003d04
112+
0000000100003d10 mov w20, #0x1e0
113+
0000000100003d14 bl 0x100003ef4 ; symbol stub for: _rand
114+
0000000100003d18 subs x20, x20, #0x1
115+
0000000100003d1c b.ne 0x100003d14
116+
0000000100003d20 mov w20, #0x1e0
117+
0000000100003d24 bl 0x100003ef4 ; symbol stub for: _rand
118+
0000000100003d28 subs x20, x20, #0x1
119+
0000000100003d2c b.ne 0x100003d24
120+
0000000100003d30 mov w20, #0x1e0
121+
0000000100003d34 bl 0x100003ef4 ; symbol stub for: _rand
122+
0000000100003d38 subs x20, x20, #0x1
123+
0000000100003d3c b.ne 0x100003d34
124+
0000000100003d40 mov w20, #0x1e0
125+
0000000100003d44 bl 0x100003ef4 ; symbol stub for: _rand
126+
0000000100003d48 subs x20, x20, #0x1
127+
0000000100003d4c b.ne 0x100003d44
128+
0000000100003d50 mov w20, #0x1e0
129+
0000000100003d54 bl 0x100003ef4 ; symbol stub for: _rand
130+
0000000100003d58 subs x20, x20, #0x1
131+
0000000100003d5c b.ne 0x100003d54
132+
0000000100003d60 mov w20, #0x1e0
133+
0000000100003d64 bl 0x100003ef4 ; symbol stub for: _rand
134+
0000000100003d68 subs x20, x20, #0x1
135+
0000000100003d6c b.ne 0x100003d64
136+
0000000100003d70 mov w20, #0x1e0
137+
0000000100003d74 bl 0x100003ef4 ; symbol stub for: _rand
138+
0000000100003d78 subs x20, x20, #0x1
139+
0000000100003d7c b.ne 0x100003d74
140+
0000000100003d80 mov w20, #0x1e0
141+
0000000100003d84 bl 0x100003ef4 ; symbol stub for: _rand
142+
0000000100003d88 subs x20, x20, #0x1
143+
0000000100003d8c b.ne 0x100003d84
144+
0000000100003d90 mov w20, #0x1e0
145+
0000000100003d94 bl 0x100003ef4 ; symbol stub for: _rand
146+
0000000100003d98 subs x20, x20, #0x1
147+
0000000100003d9c b.ne 0x100003d94
148+
0000000100003da0 scvtf d0, x19 ; Latency: 10
149+
0000000100003da4 fadd d9, d9, d0 ; Latency: 5
150+
0000000100003da8 mov w19, #0x200
151+
0000000100003dac bl 0x100003ef4 ; symbol stub for: _rand
152+
0000000100003db0 bl 0x100003ef4 ; symbol stub for: _rand
153+
0000000100003db4 bl 0x100003ef4 ; symbol stub for: _rand
154+
0000000100003db8 bl 0x100003ef4 ; symbol stub for: _rand
155+
0000000100003dbc bl 0x100003ef4 ; symbol stub for: _rand
156+
0000000100003dc0 bl 0x100003ef4 ; symbol stub for: _rand
157+
0000000100003dc4 bl 0x100003ef4 ; symbol stub for: _rand
158+
0000000100003dc8 bl 0x100003ef4 ; symbol stub for: _rand
159+
0000000100003dcc bl 0x100003ef4 ; symbol stub for: _rand
160+
0000000100003dd0 bl 0x100003ef4 ; symbol stub for: _rand
161+
0000000100003dd4 bl 0x100003ef4 ; symbol stub for: _rand
162+
0000000100003dd8 bl 0x100003ef4 ; symbol stub for: _rand
163+
0000000100003ddc bl 0x100003ef4 ; symbol stub for: _rand
164+
0000000100003de0 bl 0x100003ef4 ; symbol stub for: _rand
165+
0000000100003de4 bl 0x100003ef4 ; symbol stub for: _rand
166+
0000000100003de8 bl 0x100003ef4 ; symbol stub for: _rand
167+
0000000100003dec subs x19, x19, #0x1
168+
0000000100003df0 b.ne 0x100003dac
169+
0000000100003df4 mov w20, #0x1e0
170+
0000000100003df8 bl 0x100003ef4 ; symbol stub for: _rand
171+
0000000100003dfc subs x20, x20, #0x1
172+
0000000100003e00 b.ne 0x100003df8
173+
0000000100003e04 add x19, x19, #0x1
174+
0000000100003e08 cmp x19, #0x200
175+
0000000100003e0c b.ne 0x100003df4
176+
0000000100003e10 add x0, sp, #0x8
177+
0000000100003e14 add x1, sp, #0x18
178+
0000000100003e18 bl 0x100003edc ; symbol stub for: _gettimeofday
179+
0000000100003e1c cbz w0, 0x100003e30
180+
0000000100003e20 str x0, [sp] ; Latency: 4
181+
0000000100003e24 adr x0, #0x144 ; literal pool for: "Error return from gettimeofday: %d"
182+
0000000100003e28 nop
183+
0000000100003e2c bl 0x100003ee8 ; symbol stub for: _printf
184+
0000000100003e30 ldr d0, [sp, #0x8] ; Latency: 4
185+
0000000100003e34 scvtf d0, d0 ; Latency: 2
186+
0000000100003e38 ldr s1, [sp, #0x10] ; Latency: 4
187+
0000000100003e3c sshll.2d v1, v1, #0x0 ; Latency: 2
188+
0000000100003e40 scvtf d1, d1 ; Latency: 2
189+
0000000100003e44 fmul d1, d1, d8 ; Latency: 5
190+
0000000100003e48 fadd d8, d1, d0 ; Latency: 5
191+
0000000100003e4c adr x0, #0x13f ; literal pool for: "FILE_NAME"
192+
0000000100003e50 nop
193+
0000000100003e54 adr x1, #0x141 ; literal pool for: "w"
194+
0000000100003e58 nop
195+
0000000100003e5c bl 0x100003ec4 ; symbol stub for: _fopen
196+
0000000100003e60 mov x19, x0 ; Latency: 2
197+
0000000100003e64 fsub d0, d8, d9 ; Latency: 5
198+
0000000100003e68 mov x8, #0x700000000000
199+
0000000100003e6c movk x8, #0x41c7, lsl #48
200+
0000000100003e70 fmov d1, x8 ; Latency: 5
201+
0000000100003e74 fdiv d0, d1, d0 ; Latency: 17
202+
0000000100003e78 mov x8, #0xcd6500000000
203+
0000000100003e7c movk x8, #0x41cd, lsl #48
204+
0000000100003e80 fmov d1, x8 ; Latency: 5
205+
0000000100003e84 fdiv d0, d0, d1 ; Latency: 17
206+
0000000100003e88 str d0, [sp] ; Latency: 4
207+
0000000100003e8c adr x1, #0x10b ; literal pool for: "%0.2lf GFLOPS\n"
208+
0000000100003e90 nop
209+
0000000100003e94 bl 0x100003ed0 ; symbol stub for: _fprintf
210+
0000000100003e98 mov x0, x19 ; Latency: 2
211+
0000000100003e9c bl 0x100003eb8 ; symbol stub for: _fclose
212+
0000000100003ea0 mov w0, #0x0
213+
0000000100003ea4 ldp x29, x30, [sp, #0x40] ; Latency: 4
214+
0000000100003ea8 ldp x20, x19, [sp, #0x30] ; Latency: 4
215+
0000000100003eac ldp d9, d8, [sp, #0x20] ; Latency: 4
216+
0000000100003eb0 add sp, sp, #0x50
217+
0000000100003eb4 ret

0 commit comments

Comments
 (0)
0