|
| 1 | +b/matmul/matmul_480x512x16: |
| 2 | +(__TEXT,__text) section |
| 3 | +_rtclock: |
| 4 | +0000000100003b68 sub sp, sp, #0x30 |
| 5 | +0000000100003b6c stp x29, x30, [sp, #0x20] ; Latency: 6 |
| 6 | +0000000100003b70 add x29, sp, #0x20 |
| 7 | +0000000100003b74 add x0, sp, #0x8 |
| 8 | +0000000100003b78 sub x1, x29, #0x8 |
| 9 | +0000000100003b7c bl 0x100003edc ; symbol stub for: _gettimeofday |
| 10 | +0000000100003b80 cbz w0, 0x100003b94 |
| 11 | +0000000100003b84 str x0, [sp] ; Latency: 4 |
| 12 | +0000000100003b88 adr x0, #0x3e0 ; literal pool for: "Error return from gettimeofday: %d" |
| 13 | +0000000100003b8c nop |
| 14 | +0000000100003b90 bl 0x100003ee8 ; symbol stub for: _printf |
| 15 | +0000000100003b94 ldr d0, [sp, #0x8] ; Latency: 4 |
| 16 | +0000000100003b98 scvtf d0, d0 ; Latency: 2 |
| 17 | +0000000100003b9c ldr s1, [sp, #0x10] ; Latency: 4 |
| 18 | +0000000100003ba0 sshll.2d v1, v1, #0x0 ; Latency: 2 |
| 19 | +0000000100003ba4 scvtf d1, d1 ; Latency: 2 |
| 20 | +0000000100003ba8 nop |
| 21 | +0000000100003bac ldr d2, 0x100003f60 ; Latency: 4 |
| 22 | +0000000100003bb0 fmul d1, d1, d2 ; Latency: 5 |
| 23 | +0000000100003bb4 fadd d0, d1, d0 ; Latency: 5 |
| 24 | +0000000100003bb8 ldp x29, x30, [sp, #0x20] ; Latency: 4 |
| 25 | +0000000100003bbc add sp, sp, #0x30 |
| 26 | +0000000100003bc0 ret |
| 27 | +_init_matrix: |
| 28 | +0000000100003bc4 stp x26, x25, [sp, #-0x50]! ; Latency: 6 |
| 29 | +0000000100003bc8 stp x24, x23, [sp, #0x10] ; Latency: 6 |
| 30 | +0000000100003bcc stp x22, x21, [sp, #0x20] ; Latency: 6 |
| 31 | +0000000100003bd0 stp x20, x19, [sp, #0x30] ; Latency: 6 |
| 32 | +0000000100003bd4 stp x29, x30, [sp, #0x40] ; Latency: 6 |
| 33 | +0000000100003bd8 add x29, sp, #0x40 |
| 34 | +0000000100003bdc cmp w2, #0x1 |
| 35 | +0000000100003be0 b.lt 0x100003c38 |
| 36 | +0000000100003be4 cmp w1, #0x1 |
| 37 | +0000000100003be8 b.lt 0x100003c38 |
| 38 | +0000000100003bec mov x19, x0 ; Latency: 2 |
| 39 | +0000000100003bf0 mov x20, #0x0 |
| 40 | +0000000100003bf4 mov w21, w1 ; Latency: 2 |
| 41 | +0000000100003bf8 mov w22, w2 ; Latency: 2 |
| 42 | +0000000100003bfc lsl x23, x21, #2 |
| 43 | +0000000100003c00 mov w24, #0x30000000 |
| 44 | +0000000100003c04 mov x25, x21 ; Latency: 2 |
| 45 | +0000000100003c08 mov x26, x19 ; Latency: 2 |
| 46 | +0000000100003c0c bl 0x100003ef4 ; symbol stub for: _rand |
| 47 | +0000000100003c10 scvtf s0, w0 ; Latency: 10 |
| 48 | +0000000100003c14 fmov s1, w24 ; Latency: 5 |
| 49 | +0000000100003c18 fmul s0, s0, s1 ; Latency: 4 |
| 50 | +0000000100003c1c str s0, [x26], #0x4 ; Latency: 4 |
| 51 | +0000000100003c20 subs x25, x25, #0x1 |
| 52 | +0000000100003c24 b.ne 0x100003c0c |
| 53 | +0000000100003c28 add x20, x20, #0x1 |
| 54 | +0000000100003c2c add x19, x19, x23 ; Latency: 2 |
| 55 | +0000000100003c30 cmp x20, x22 ; Latency: 2 |
| 56 | +0000000100003c34 b.ne 0x100003c04 |
| 57 | +0000000100003c38 ldp x29, x30, [sp, #0x40] ; Latency: 4 |
| 58 | +0000000100003c3c ldp x20, x19, [sp, #0x30] ; Latency: 4 |
| 59 | +0000000100003c40 ldp x22, x21, [sp, #0x20] ; Latency: 4 |
| 60 | +0000000100003c44 ldp x24, x23, [sp, #0x10] ; Latency: 4 |
| 61 | +0000000100003c48 ldp x26, x25, [sp], #0x50 ; Latency: 4 |
| 62 | +0000000100003c4c ret |
| 63 | +_main: |
| 64 | +0000000100003c50 sub sp, sp, #0x50 |
| 65 | +0000000100003c54 stp d9, d8, [sp, #0x20] ; Latency: 6 |
| 66 | +0000000100003c58 stp x20, x19, [sp, #0x30] ; Latency: 6 |
| 67 | +0000000100003c5c stp x29, x30, [sp, #0x40] ; Latency: 6 |
| 68 | +0000000100003c60 add x29, sp, #0x40 |
| 69 | +0000000100003c64 add x0, sp, #0x8 |
| 70 | +0000000100003c68 add x1, sp, #0x18 |
| 71 | +0000000100003c6c bl 0x100003edc ; symbol stub for: _gettimeofday |
| 72 | +0000000100003c70 cbz w0, 0x100003c84 |
| 73 | +0000000100003c74 str x0, [sp] ; Latency: 4 |
| 74 | +0000000100003c78 adr x0, #0x2f0 ; literal pool for: "Error return from gettimeofday: %d" |
| 75 | +0000000100003c7c nop |
| 76 | +0000000100003c80 bl 0x100003ee8 ; symbol stub for: _printf |
| 77 | +0000000100003c84 ldr x19, [sp, #0x8] ; Latency: 4 |
| 78 | +0000000100003c88 ldr s0, [sp, #0x10] ; Latency: 4 |
| 79 | +0000000100003c8c sshll.2d v0, v0, #0x0 ; Latency: 2 |
| 80 | +0000000100003c90 scvtf d0, d0 ; Latency: 2 |
| 81 | +0000000100003c94 nop |
| 82 | +0000000100003c98 ldr d8, 0x100003f60 ; Latency: 4 |
| 83 | +0000000100003c9c fmul d9, d0, d8 ; Latency: 5 |
| 84 | +0000000100003ca0 mov w20, #0x1e0 |
| 85 | +0000000100003ca4 bl 0x100003ef4 ; symbol stub for: _rand |
| 86 | +0000000100003ca8 subs x20, x20, #0x1 |
| 87 | +0000000100003cac b.ne 0x100003ca4 |
| 88 | +0000000100003cb0 mov w20, #0x1e0 |
| 89 | +0000000100003cb4 bl 0x100003ef4 ; symbol stub for: _rand |
| 90 | +0000000100003cb8 subs x20, x20, #0x1 |
| 91 | +0000000100003cbc b.ne 0x100003cb4 |
| 92 | +0000000100003cc0 mov w20, #0x1e0 |
| 93 | +0000000100003cc4 bl 0x100003ef4 ; symbol stub for: _rand |
| 94 | +0000000100003cc8 subs x20, x20, #0x1 |
| 95 | +0000000100003ccc b.ne 0x100003cc4 |
| 96 | +0000000100003cd0 mov w20, #0x1e0 |
| 97 | +0000000100003cd4 bl 0x100003ef4 ; symbol stub for: _rand |
| 98 | +0000000100003cd8 subs x20, x20, #0x1 |
| 99 | +0000000100003cdc b.ne 0x100003cd4 |
| 100 | +0000000100003ce0 mov w20, #0x1e0 |
| 101 | +0000000100003ce4 bl 0x100003ef4 ; symbol stub for: _rand |
| 102 | +0000000100003ce8 subs x20, x20, #0x1 |
| 103 | +0000000100003cec b.ne 0x100003ce4 |
| 104 | +0000000100003cf0 mov w20, #0x1e0 |
| 105 | +0000000100003cf4 bl 0x100003ef4 ; symbol stub for: _rand |
| 106 | +0000000100003cf8 subs x20, x20, #0x1 |
| 107 | +0000000100003cfc b.ne 0x100003cf4 |
| 108 | +0000000100003d00 mov w20, #0x1e0 |
| 109 | +0000000100003d04 bl 0x100003ef4 ; symbol stub for: _rand |
| 110 | +0000000100003d08 subs x20, x20, #0x1 |
| 111 | +0000000100003d0c b.ne 0x100003d04 |
| 112 | +0000000100003d10 mov w20, #0x1e0 |
| 113 | +0000000100003d14 bl 0x100003ef4 ; symbol stub for: _rand |
| 114 | +0000000100003d18 subs x20, x20, #0x1 |
| 115 | +0000000100003d1c b.ne 0x100003d14 |
| 116 | +0000000100003d20 mov w20, #0x1e0 |
| 117 | +0000000100003d24 bl 0x100003ef4 ; symbol stub for: _rand |
| 118 | +0000000100003d28 subs x20, x20, #0x1 |
| 119 | +0000000100003d2c b.ne 0x100003d24 |
| 120 | +0000000100003d30 mov w20, #0x1e0 |
| 121 | +0000000100003d34 bl 0x100003ef4 ; symbol stub for: _rand |
| 122 | +0000000100003d38 subs x20, x20, #0x1 |
| 123 | +0000000100003d3c b.ne 0x100003d34 |
| 124 | +0000000100003d40 mov w20, #0x1e0 |
| 125 | +0000000100003d44 bl 0x100003ef4 ; symbol stub for: _rand |
| 126 | +0000000100003d48 subs x20, x20, #0x1 |
| 127 | +0000000100003d4c b.ne 0x100003d44 |
| 128 | +0000000100003d50 mov w20, #0x1e0 |
| 129 | +0000000100003d54 bl 0x100003ef4 ; symbol stub for: _rand |
| 130 | +0000000100003d58 subs x20, x20, #0x1 |
| 131 | +0000000100003d5c b.ne 0x100003d54 |
| 132 | +0000000100003d60 mov w20, #0x1e0 |
| 133 | +0000000100003d64 bl 0x100003ef4 ; symbol stub for: _rand |
| 134 | +0000000100003d68 subs x20, x20, #0x1 |
| 135 | +0000000100003d6c b.ne 0x100003d64 |
| 136 | +0000000100003d70 mov w20, #0x1e0 |
| 137 | +0000000100003d74 bl 0x100003ef4 ; symbol stub for: _rand |
| 138 | +0000000100003d78 subs x20, x20, #0x1 |
| 139 | +0000000100003d7c b.ne 0x100003d74 |
| 140 | +0000000100003d80 mov w20, #0x1e0 |
| 141 | +0000000100003d84 bl 0x100003ef4 ; symbol stub for: _rand |
| 142 | +0000000100003d88 subs x20, x20, #0x1 |
| 143 | +0000000100003d8c b.ne 0x100003d84 |
| 144 | +0000000100003d90 mov w20, #0x1e0 |
| 145 | +0000000100003d94 bl 0x100003ef4 ; symbol stub for: _rand |
| 146 | +0000000100003d98 subs x20, x20, #0x1 |
| 147 | +0000000100003d9c b.ne 0x100003d94 |
| 148 | +0000000100003da0 scvtf d0, x19 ; Latency: 10 |
| 149 | +0000000100003da4 fadd d9, d9, d0 ; Latency: 5 |
| 150 | +0000000100003da8 mov w19, #0x200 |
| 151 | +0000000100003dac bl 0x100003ef4 ; symbol stub for: _rand |
| 152 | +0000000100003db0 bl 0x100003ef4 ; symbol stub for: _rand |
| 153 | +0000000100003db4 bl 0x100003ef4 ; symbol stub for: _rand |
| 154 | +0000000100003db8 bl 0x100003ef4 ; symbol stub for: _rand |
| 155 | +0000000100003dbc bl 0x100003ef4 ; symbol stub for: _rand |
| 156 | +0000000100003dc0 bl 0x100003ef4 ; symbol stub for: _rand |
| 157 | +0000000100003dc4 bl 0x100003ef4 ; symbol stub for: _rand |
| 158 | +0000000100003dc8 bl 0x100003ef4 ; symbol stub for: _rand |
| 159 | +0000000100003dcc bl 0x100003ef4 ; symbol stub for: _rand |
| 160 | +0000000100003dd0 bl 0x100003ef4 ; symbol stub for: _rand |
| 161 | +0000000100003dd4 bl 0x100003ef4 ; symbol stub for: _rand |
| 162 | +0000000100003dd8 bl 0x100003ef4 ; symbol stub for: _rand |
| 163 | +0000000100003ddc bl 0x100003ef4 ; symbol stub for: _rand |
| 164 | +0000000100003de0 bl 0x100003ef4 ; symbol stub for: _rand |
| 165 | +0000000100003de4 bl 0x100003ef4 ; symbol stub for: _rand |
| 166 | +0000000100003de8 bl 0x100003ef4 ; symbol stub for: _rand |
| 167 | +0000000100003dec subs x19, x19, #0x1 |
| 168 | +0000000100003df0 b.ne 0x100003dac |
| 169 | +0000000100003df4 mov w20, #0x1e0 |
| 170 | +0000000100003df8 bl 0x100003ef4 ; symbol stub for: _rand |
| 171 | +0000000100003dfc subs x20, x20, #0x1 |
| 172 | +0000000100003e00 b.ne 0x100003df8 |
| 173 | +0000000100003e04 add x19, x19, #0x1 |
| 174 | +0000000100003e08 cmp x19, #0x200 |
| 175 | +0000000100003e0c b.ne 0x100003df4 |
| 176 | +0000000100003e10 add x0, sp, #0x8 |
| 177 | +0000000100003e14 add x1, sp, #0x18 |
| 178 | +0000000100003e18 bl 0x100003edc ; symbol stub for: _gettimeofday |
| 179 | +0000000100003e1c cbz w0, 0x100003e30 |
| 180 | +0000000100003e20 str x0, [sp] ; Latency: 4 |
| 181 | +0000000100003e24 adr x0, #0x144 ; literal pool for: "Error return from gettimeofday: %d" |
| 182 | +0000000100003e28 nop |
| 183 | +0000000100003e2c bl 0x100003ee8 ; symbol stub for: _printf |
| 184 | +0000000100003e30 ldr d0, [sp, #0x8] ; Latency: 4 |
| 185 | +0000000100003e34 scvtf d0, d0 ; Latency: 2 |
| 186 | +0000000100003e38 ldr s1, [sp, #0x10] ; Latency: 4 |
| 187 | +0000000100003e3c sshll.2d v1, v1, #0x0 ; Latency: 2 |
| 188 | +0000000100003e40 scvtf d1, d1 ; Latency: 2 |
| 189 | +0000000100003e44 fmul d1, d1, d8 ; Latency: 5 |
| 190 | +0000000100003e48 fadd d8, d1, d0 ; Latency: 5 |
| 191 | +0000000100003e4c adr x0, #0x13f ; literal pool for: "FILE_NAME" |
| 192 | +0000000100003e50 nop |
| 193 | +0000000100003e54 adr x1, #0x141 ; literal pool for: "w" |
| 194 | +0000000100003e58 nop |
| 195 | +0000000100003e5c bl 0x100003ec4 ; symbol stub for: _fopen |
| 196 | +0000000100003e60 mov x19, x0 ; Latency: 2 |
| 197 | +0000000100003e64 fsub d0, d8, d9 ; Latency: 5 |
| 198 | +0000000100003e68 mov x8, #0x700000000000 |
| 199 | +0000000100003e6c movk x8, #0x41c7, lsl #48 |
| 200 | +0000000100003e70 fmov d1, x8 ; Latency: 5 |
| 201 | +0000000100003e74 fdiv d0, d1, d0 ; Latency: 17 |
| 202 | +0000000100003e78 mov x8, #0xcd6500000000 |
| 203 | +0000000100003e7c movk x8, #0x41cd, lsl #48 |
| 204 | +0000000100003e80 fmov d1, x8 ; Latency: 5 |
| 205 | +0000000100003e84 fdiv d0, d0, d1 ; Latency: 17 |
| 206 | +0000000100003e88 str d0, [sp] ; Latency: 4 |
| 207 | +0000000100003e8c adr x1, #0x10b ; literal pool for: "%0.2lf GFLOPS\n" |
| 208 | +0000000100003e90 nop |
| 209 | +0000000100003e94 bl 0x100003ed0 ; symbol stub for: _fprintf |
| 210 | +0000000100003e98 mov x0, x19 ; Latency: 2 |
| 211 | +0000000100003e9c bl 0x100003eb8 ; symbol stub for: _fclose |
| 212 | +0000000100003ea0 mov w0, #0x0 |
| 213 | +0000000100003ea4 ldp x29, x30, [sp, #0x40] ; Latency: 4 |
| 214 | +0000000100003ea8 ldp x20, x19, [sp, #0x30] ; Latency: 4 |
| 215 | +0000000100003eac ldp d9, d8, [sp, #0x20] ; Latency: 4 |
| 216 | +0000000100003eb0 add sp, sp, #0x50 |
| 217 | +0000000100003eb4 ret |
0 commit comments