8000 Use map().to_bitmask() instead of build_bitmask() · JavaScriptExpert/simdjson@9cc4ddf · GitHub
[go: up one dir, main page]

Skip to content

Commit 9cc4ddf

Browse files
committed
Use map().to_bitmask() instead of build_bitmask()
1 parent 441963c commit 9cc4ddf

File tree

6 files changed

+76
-53
lines changed

6 files changed

+76
-53
lines changed

src/arm64/simd_input.h

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -46,45 +46,39 @@ struct simd_input<Architecture::ARM64> {
4646
this->i3 = vld1q_u8(ptr + 48);
4747
}
4848

49-
really_inline simd_input(uint8x16_t i0, uint8x16_t i1, uint8x16_t i2, uint8x16_t i3) {
50-
this->i0 = i0;
51-
this->i1 = i1;
52-
this->i2 = i2;
53-
this->i3 = i3;
54-
}
55-
56-
template <typename F>
57-
really_inline uint64_t build_bitmask(F const& chunk_to_mask) {
58-
uint8x16_t r0 = chunk_to_mask(this->i0);
59-
uint8x16_t r1 = chunk_to_mask(this->i1);
60-
uint8x16_t r2 = chunk_to_mask(this->i2);
61-
uint8x16_t r3 = chunk_to_mask(this->i3);
62-
return neon_movemask_bulk(r0, r1, r2, r3);
49+
really_inline simd_input(uint8x16_t a0, uint8x16_t a1, uint8x16_t a2, uint8x16_t a3) {
50+
this->i0 = a0;
51+
this->i1 = a1;
52+
this->i2 = a2;
53+
this->i3 = a3;
6354
}
6455

6556
template <typename F>
6657
really_inline simd_input<Architecture::ARM64> map(F const& map_chunk) {
67-
simd_input<Architecture::ARM64> result = {
58+
return simd_input<Architecture::ARM64>(
6859
map_chunk(this->i0),
6960
map_chunk(this->i1),
7061
map_chunk(this->i2),
7162
map_chunk(this->i3)
72-
};
73-
return result;
63+
);
64+
}
65+
66+
really_inline uint64_t to_bitmask() {
67+
return neon_movemask_bulk(this->i0, this->i1, this->i2, this->i3);
7468
}
7569

7670
really_inline uint64_t eq(uint8_t m) {
7771
const uint8x16_t mask = vmovq_n_u8(m);
78-
return this->build_bitmask([&](uint8x16_t chunk) {
72+
return this->map([&](uint8x16_t chunk) {
7973
return vceqq_u8(chunk, mask);
80-
});
74+
}).to_bitmask();
8175
}
8276

8377
really_inline uint64_t lteq(uint8_t m) {
8478
const uint8x16_t mask = vmovq_n_u8(m);
85-
return this->build_bitmask([&](uint8x16_t chunk) {
79+
return this->map([&](uint8x16_t chunk) {
8680
return vcleq_u8(chunk, mask);
87-
});
81+
}).to_bitmask();
8882
}
8983

9084
}; // struct simd_input

src/arm64/stage1_find_marks.h

Lines changed: 4 additions & 4 deletions
62FB
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,14 @@ really_inline void find_whitespace_and_structurals(
3939
});
4040

4141
const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7);
42-
structurals = v.build_bitmask([&](auto chunk) {
42+
structurals = v.map([&](auto chunk) {
4343
return vtstq_u8(chunk, structural_shufti_mask);
44-
});
44+
}).to_bitmask();
4545

4646
const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18);
47-
whitespace = v.build_bitmask([&](auto chunk) {
47+
whitespace = v.map([&](auto chunk) {
4848
return vtstq_u8(chunk, whitespace_shufti_mask);
49-
});
49+
}).to_bitmask();
5050
}
5151

5252
#include "generic/stage1_find_marks_flatten.h"

src/haswell/simd_input.h

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,37 @@ struct simd_input<Architecture::HASWELL> {
1818
this->hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 32));
1919
}
2020

21+
A8CD really_inline simd_input(__m256i i0, __m256i i1) {
22+
this->lo = i0;
23+
this->hi = i1;
24+
}
25+
2126
template <typename F>
22-
really_inline uint64_t build_bitmask(F const& chunk_to_mask) {
23-
uint64_t r0 = static_cast<uint32_t>(_mm256_movemask_epi8(chunk_to_mask(this->lo)));
24-
uint64_t r1 = _mm256_movemask_epi8(chunk_to_mask(this->hi));
27+
really_inline simd_input<Architecture::HASWELL> map(F const& map_chunk) {
28+
return simd_input<Architecture::HASWELL>(
29+
map_chunk(this->lo),
30+
map_chunk(this->hi)
31+
);
32+
}
33+
34+
really_inline uint64_t to_bitmask() {
35+
uint64_t r0 = static_cast<uint32_t>(_mm256_movemask_epi8(this->lo));
36+
uint64_t r1 = _mm256_movemask_epi8(this->hi);
2537
return r0 | (r1 << 32);
2638
}
2739

2840
really_inline uint64_t eq(uint8_t m) {
2941
const __m256i mask = _mm256_set1_epi8(m);
30-
return this->build_bitmask([&] (auto chunk) {
42+
return this->map([&] (auto chunk) {
3143
return _mm256_cmpeq_epi8(chunk, mask);
32-
});
44+
}).to_bitmask();
3345
}
3446

3547
really_inline uint64_t lteq(uint8_t m) {
3648
const __m256i maxval = _mm256_set1_epi8(m);
37-
return this->build_bitmask([&] (auto chunk) {
49+
return this->map([&] (auto chunk) {
3850
return _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, chunk), maxval);
39-
});
51+
}).to_bitmask();
4052
}
4153

4254
}; // struct simd_input

src/haswell/stage1_find_marks.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,26 +34,26 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
3434
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
3535
const __m256i mask_column = _mm256_set1_epi8(0x3a);
3636
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
37-
structurals = in->build_bitmask([&](auto in) {
37+
structurals = in.map([&](auto in) {
3838
__m256i structurals = _mm256_cmpeq_epi8(in, mask_open_brace);
3939
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_close_brace));
4040
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_open_bracket));
4141
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_close_bracket));
4242
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_column));
4343
structurals = _mm256_or_si256(structurals, _mm256_cmpeq_epi8(in, mask_comma));
4444
return structurals;
45-
});
45+
}).to_bitmask();
4646

4747
const __m256i mask_space = _mm256_set1_epi8(0x20);
4848
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
4949
const __m256i mask_tab = _mm256_set1_epi8(0x09);
5050
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
51-
whitespace = in->build_bitmask([&](auto in) {
51+
whitespace = in.map([&](auto in) {
5252
__m256i space = _mm256_cmpeq_epi8(in, mask_space);
5353
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_linefeed));
5454
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_tab));
5555
space = _mm256_or_si256(space, _mm256_cmpeq_epi8(in, mask_carriage));
56-
});
56+
}).to_bitmask();
5757
// end of naive approach
5858

5959
#else // SIMDJSON_NAIVE_STRUCTURAL
@@ -69,15 +69,15 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
6969
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
7070
const __m256i struct_mask = _mm256_set1_epi8(32);
7171

72-
whitespace = in.build_bitmask([&](auto chunk) {
72+
whitespace = in.map([&](auto chunk) {
7373
return _mm256_cmpeq_epi8(chunk, _mm256_shuffle_epi8(white_table, chunk));
74-
});
75-
structurals = in.build_bitmask([&](auto chunk) {
74+
}).to_bitmask();
75+
structurals = in.map([&](auto chunk) {
7676
__m256i struct_r1 = _mm256_add_epi8(struct_offset, chunk);
7777
__m256i struct_r2 = _mm256_or_si256(chunk, struct_mask);
7878
__m256i struct_r3 = _mm256_shuffle_epi8(structural_table, struct_r1);
7979
return _mm256_cmpeq_epi8(struct_r2, struct_r3);
80-
});
80+
}).to_bitmask();
8181

8282
#endif // else SIMDJSON_NAIVE_STRUCTURAL
8383
}

src/westmere/simd_input.h

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,27 +22,44 @@ struct simd_input<Architecture::WESTMERE> {
2222
this->v3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 48));
2323
}
2424

25+
really_inline simd_input(__m128i i0, __m128i i1, __m128i i2, __m128i i3)
26+
{
27+
this->v0 = i0;
28+
this->v1 = i1;
29+
this->v2 = i2;
30+
this->v3 = i3;
31+
}
32+
2533
template <typename F>
26-
really_inline uint64_t build_bitmask(F const& chunk_to_mask) {
27-
uint64_t r0 = static_cast<uint32_t>(_mm_movemask_epi8(chunk_to_mask(this->v0)));
28-
uint64_t r1 = _mm_movemask_epi8(chunk_to_mask(this->v1));
29-
uint64_t r2 = _mm_movemask_epi8(chunk_to_mask(this->v2));
30-
uint64_t r3 = _mm_movemask_epi8(chunk_to_mask(this->v3));
34+
really_inline simd_input<Architecture::WESTMERE> map(F const& map_chunk) {
35+
return simd_input<Architecture::WESTMERE>(
36+
map_chunk(this->v0),
37+
map_chunk(this->v1),
38+
map_chunk(this->v2),
39+
map_chunk(this->v3)
40+
);
41+
}
42+
43+
really_inline uint64_t to_bitmask() {
44+
uint64_t r0 = static_cast<uint32_t>(_mm_movemask_epi8(this->v0));
45+
uint64_t r1 = _mm_movemask_epi8(this->v0);
46+
uint64_t r2 = _mm_movemask_epi8(this->v2);
47+
uint64_t r3 = _mm_movemask_epi8(this->v3);
3148
return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
3249
}
3350

3451
really_inline uint64_t eq(uint8_t m) {
3552
const __m128i mask = _mm_set1_epi8(m);
36-
return this->build_bitmask([&](auto chunk) {
53+
return this->map([&](auto chunk) {
3754
return _mm_cmpeq_epi8(chunk, mask);
38-
});
55+
}).to_bitmask();
3956
}
4057

4158
really_inline uint64_t lteq(uint8_t m) {
4259
const __m128i maxval = _mm_set1_epi8(m);
43-
return this->build_bitmask([&](auto chunk) {
60+
return this->map([&](auto chunk) {
4461
return _mm_cmpeq_epi8(_mm_max_epu8(maxval, chunk), maxval);
45-
});
62+
}).to_bitmask();
4663
}
4764

4865
}; // struct simd_input

src/westmere/stage1_find_marks.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,16 @@ really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTURE> in,
2828
const __m128i struct_offset = _mm_set1_epi8(0xd4u);
2929
const __m128i struct_mask = _mm_set1_epi8(32);
3030

31-
whitespace = in.build_bitmask([&](auto chunk) {
31+
whitespace = in.map([&](auto chunk) {
3232
return _mm_cmpeq_epi8(chunk, _mm_shuffle_epi8(white_table, chunk));
33-
});
33+
}).to_bitmask();
3434

35-
structurals = in.build_bitmask([&](auto chunk) {
35+
structurals = in.map([&](auto chunk) {
3636
__m128i struct_r1 = _mm_add_epi8(struct_offset, chunk);
3737
__m128i struct_r2 = _mm_or_si128(chunk, struct_mask);
3838
__m128i struct_r3 = _mm_shuffle_epi8(structural_table, struct_r1);
3939
return _mm_cmpeq_epi8(struct_r2, struct_r3);
40-
});
40+
}).to_bitmask();
4141
}
4242

4343
#include "generic/stage1_find_marks_flatten.h"

0 commit comments

Comments
 (0)
0