|
1 |
| -/* auto-generated on Fri Aug 23 10:23:28 DST 2019. Do not edit! */ |
| 1 | +/* auto-generated on Fri Aug 23 11:02:39 DST 2019. Do not edit! */ |
2 | 2 | #include "simdjson.h"
|
3 | 3 |
|
4 | 4 | /* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
|
@@ -574,22 +574,38 @@ struct simd_input<Architecture::ARM64> {
|
574 | 574 | this->i3 = vld1q_u8(ptr + 48);
|
575 | 575 | }
|
576 | 576 |
|
| 577 | + template <typename F> |
| 578 | + really_inline uint64_t build_bitmask(F const& chunk_to_mask) { |
| 579 | + uint8x16_t r0 = chunk_to_mask(this->i0); |
| 580 | + uint8x16_t r1 = chunk_to_mask(this->i1); |
| 581 | + uint8x16_t r2 = chunk_to_mask(this->i2); |
| 582 | + uint8x16_t r3 = chunk_to_mask(this->i3); |
| 583 | + return neon_movemask_bulk(r0, r1, r2, r3); |
| 584 | + } |
| 585 | + |
| 586 | + template <typename F> |
| 587 | + really_inline simd_input<Architecture::ARM64> map(F const& map_chunk) { |
| 588 | + simd_input<Architecture::ARM64> result = { |
| 589 | + map_chunk(this->i0), |
| 590 | + map_chunk(this->i1), |
| 591 | + map_chunk(this->i2), |
| 592 | + map_chunk(this->i3) |
| 593 | + }; |
| 594 | + return result; |
| 595 | + } |
| 596 | + |
577 | 597 | really_inline uint64_t eq(uint8_t m) {
|
578 | 598 | const uint8x16_t mask = vmovq_n_u8(m);
|
579 |
| - uint8x16_t cmp_res_0 = vceqq_u8(this->i0, mask); |
580 |
| - uint8x16_t cmp_res_1 = vceqq_u8(this->i1, mask); |
581 |
| - uint8x16_t cmp_res_2 = vceqq_u8(this->i2, mask); |
582 |
| - uint8x16_t cmp_res_3 = vceqq_u8(this->i3, mask); |
583 |
| - return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); |
| 599 | + return this->build_bitmask([&](uint8x16_t chunk) { |
| 600 | + return vceqq_u8(chunk, mask); |
| 601 | + }); |
584 | 602 | }
|
585 | 603 |
|
586 | 604 | really_inline uint64_t lteq(uint8_t m) {
|
587 | 605 | const uint8x16_t mask = vmovq_n_u8(m);
|
588 |
| - uint8x16_t cmp_res_0 = vcleq_u8(this->i0, mask); |
589 |
| - uint8x16_t cmp_res_1 = vcleq_u8(this->i1, mask); |
590 |
| - uint8x16_t cmp_res_2 = vcleq_u8(this->i2, mask); |
591 |
| - uint8x16_t cmp_res_3 = vcleq_u8(this->i3, mask); |
592 |
| - return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3); |
| 606 | + return this->build_bitmask([&](uint8x16_t chunk) { |
| 607 | + return vcleq_u8(chunk, mask); |
| 608 | + }); |
593 | 609 | }
|
594 | 610 |
|
595 | 611 | }; // struct simd_input
|
@@ -1467,45 +1483,25 @@ really_inline void find_whitespace_and_structurals(
|
1467 | 1483 | (uint8x16_t){16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0};
|
1468 | 1484 | const uint8x16_t high_nibble_mask =
|
1469 | 1485 | (uint8x16_t){8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0};
|
1470 |
| - const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7); |
1471 |
| - const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18); |
1472 | 1486 | const uint8x16_t low_nib_and_mask = vmovq_n_u8(0xf);
|
1473 | 1487 |
|
1474 |
| - uint8x16_t nib_0_lo = vandq_u8(in.i0, low_nib_and_mask); |
1475 |
| - uint8x16_t nib_0_hi = vshrq_n_u8(in.i0, 4); |
1476 |
| - uint8x16_t shuf_0_lo = vqtbl1q_u8(low_nibble_mask, nib_0_lo); |
1477 |
| - uint8x16_t shuf_0_hi = vqtbl1q_u8(high_nibble_mask, nib_0_hi); |
1478 |
| - uint8x16_t v_0 = vandq_u8(shuf_0_lo, shuf_0_hi); |
1479 |
| - |
1480 |
| - uint8x16_t nib_1_lo = vandq_u8(in.i1, low_nib_and_mask); |
1481 |
| - uint8x16_t nib_1_hi = vshrq_n_u8(in.i1, 4); |
1482 |
| - uint8x16_t shuf_1_lo = vqtbl1q_u8(low_nibble_mask, nib_1_lo); |
1483 |
| - uint8x16_t shuf_1_hi = vqtbl1q_u8(high_nibble_mask, nib_1_hi); |
1484 |
| - uint8x16_t v_1 = vandq_u8(shuf_1_lo, shuf_1_hi); |
1485 |
| - |
1486 |
| - uint8x16_t nib_2_lo = vandq_u8(in.i2, low_nib_and_mask); |
1487 |
| - uint8x16_t nib_2_hi = vshrq_n_u8(in.i2, 4); |
1488 |
| - uint8x16_t shuf_2_lo = vqtbl1q_u8(low_nibble_mask, nib_2_lo); |
1489 |
| - uint8x16_t shuf_2_hi = vqtbl1q_u8(high_nibble_mask, nib_2_hi); |
1490 |
| - uint8x16_t v_2 = vandq_u8(shuf_2_lo, shuf_2_hi); |
1491 |
| - |
1492 |
| - uint8x16_t nib_3_lo = vandq_u8(in.i3, low_nib_and_mask); |
1493 |
| - uint8x16_t nib_3_hi = vshrq_n_u8(in.i3, 4); |
1494 |
| - uint8x16_t shuf_3_lo = vqtbl1q_u8(low_nibble_mask, nib_3_lo); |
1495 |
| - uint8x16_t shuf_3_hi = vqtbl1q_u8(high_nibble_mask, nib_3_hi); |
1496 |
| - uint8x16_t v_3 = vandq_u8(shuf_3_lo, shuf_3_hi); |
1497 |
| - |
1498 |
| - uint8x16_t tmp_0 = vtstq_u8(v_0, structural_shufti_mask); |
1499 |
| - uint8x16_t tmp_1 = vtstq_u8(v_1, structural_shufti_mask); |
1500 |
| - uint8x16_t tmp_2 = vtstq_u8(v_2, structural_shufti_mask); |
1501 |
| - uint8x16_t tmp_3 = vtstq_u8(v_3, structural_shufti_mask); |
1502 |
| - structurals = neon_movemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3); |
1503 |
| - |
1504 |
| - uint8x16_t tmp_ws_0 = vtstq_u8(v_0, whitespace_shufti_mask); |
1505 |
| - uint8x16_t tmp_ws_1 = vtstq_u8(v_1, whitespace_shufti_mask); |
1506 |
| - uint8x16_t tmp_ws_2 = vtstq_u8(v_2, whitespace_shufti_mask); |
1507 |
| - uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask); |
1508 |
| - whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3); |
| 1488 | + simd_input<ARCHITECTURE> v = in.map([&](auto chunk) { |
| 1489 | + uint8x16_t nib_lo = vandq_u8(chunk, low_nib_and_mask); |
| 1490 | + uint8x16_t nib_hi = vshrq_n_u8(chunk, 4); |
| 1491 | + uint8x16_t shuf_lo = vqtbl1q_u8(low_nibble_mask, nib_lo); |
| 1492 | + uint8x16_t shuf_hi = vqtbl1q_u8(high_nibble_mask, nib_hi); |
| 1493 | + return vandq_u8(shuf_lo, shuf_hi); |
| 1494 | + }); |
| 1495 | + |
| 1496 | + const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7); |
| 1497 | + structurals = v.build_bitmask([&](auto chunk) { |
| 1498 | + return vtstq_u8(chunk, structural_shufti_mask); |
| 1499 | + }); |
| 1500 | + |
| 1501 | + const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18); |
| 1502 | + whitespace = v.build_bitmask([&](auto chunk) { |
| 1503 | + return vtstq_u8(chunk, whitespace_shufti_mask); |
| 1504 | + }); |
1509 | 1505 | }
|
1510 | 1506 |
|
1511 | 1507 | // This file contains a non-architecture-specific version of "flatten" used in stage1.
|
|
0 commit comments