8000 faster minifier (#568) · JavaScriptExpert/simdjson@5d1e3ef · GitHub
[go: up one dir, main page]

Skip to content

Commit 5d1e3ef

Browse files
lemirejkeiser
andauthored
faster minifier (simdjson#568)
* Fallback should use our scalar code. * parse should have a nicer error message. * Making it so that "minify" can use different architectures. * Let us change the minifier competition so that it tests all implementations. * Documenting the untaken optimization opportunity. Co-authored-by: John Keiser <john@johnkeiser.com>
1 parent 293ec7a commit 5d1e3ef
Collapse file tree

34 files changed

+558
-620
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,15 @@ endif # ifeq ($(SANITIZE),1)
5353
endif # ifeq ($(MEMSANITIZE),1)
5454

5555
# Headers and sources
56-
SRCHEADERS_GENERIC=src/generic/atomparsing.h src/generic/numberparsing.h src/generic/json_scanner.h src/generic/json_string_scanner.h src/generic/json_structural_indexer.h src/generic/stage2_build_tape.h src/generic/stringparsing.h src/generic/stage2_streaming_build_tape.h src/generic/utf8_fastvalidate_algorithm.h src/generic/utf8_lookup_algorithm.h src/generic/utf8_lookup2_algorithm.h src/generic/utf8_range_algorithm.h src/generic/utf8_zwegner_algorithm.h
56+
SRCHEADERS_GENERIC=src/generic/atomparsing.h src/generic/numberparsing.h src/generic/json_scanner.h src/generic/json_string_scanner.h src/generic/json_structural_indexer.h src/generic/json_minifier.h src/generic/buf_block_reader.h src/generic/stage2_build_tape.h src/generic/stringparsing.h src/generic/stage2_streaming_build_tape.h src/generic/utf8_fastvalidate_algorithm.h src/generic/utf8_lookup_algorithm.h src/generic/utf8_lookup2_algorithm.h src/generic/utf8_range_algorithm.h src/generic/utf8_zwegner_algorithm.h
5757
SRCHEADERS_ARM64= src/arm64/bitmanipulation.h src/arm64/bitmask.h src/arm64/intrinsics.h src/arm64/numberparsing.h src/arm64/simd.h src/arm64/stage1_find_marks.h src/arm64/stage2_build_tape.h src/arm64/stringparsing.h
5858
SRCHEADERS_HASWELL= src/haswell/bitmanipulation.h src/haswell/bitmask.h src/haswell/intrinsics.h src/haswell/numberparsing.h src/haswell/simd.h src/haswell/stage1_find_marks.h src/haswell/stage2_build_tape.h src/haswell/stringparsing.h
5959
SRCHEADERS_FALLBACK= src/fallback/bitmanipulation.h src/fallback/implementation.h src/fallback/numberparsing.h src/fallback/stage1_find_marks.h src/fallback/stage2_build_tape.h src/fallback/stringparsing.h
6060
SRCHEADERS_WESTMERE=src/westmere/bitmanipulation.h src/westmere/bitmask.h src/westmere/intrinsics.h src/westmere/numberparsing.h src/westmere/simd.h src/westmere/stage1_find_marks.h src/westmere/stage2_build_tape.h src/westmere/stringparsing.h
6161
SRCHEADERS_SRC=src/isadetection.h src/jsoncharutils.h src/simdprune_tables.h src/implementation.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/document_parser_callbacks.h
6262
SRCHEADERS=$(SRCHEADERS_SRC) $(SRCHEADERS_GENERIC) $(SRCHEADERS_ARM64) $(SRCHEADERS_HASWELL) $(SRCHEADERS_WESTMERE) $(SRCHEADERS_FALLBACK)
6363

64-
INCLUDEHEADERS=include/simdjson.h include/simdjson/common_defs.h include/simdjson/internal/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonminifier.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/inline/padded_string.h include/simdjson/document.h include/simdjson/inline/document.h include/simdjson/document_iterator.h include/simdjson/inline/document_iterator.h include/simdjson/document_stream.h include/simdjson/inline/document_stream.h include/simdjson/implementation.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/inline/jsonstream.h include/simdjson/portability.h include/simdjson/error.h include/simdjson/inline/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h
64+
INCLUDEHEADERS=include/simdjson.h include/simdjson/common_defs.h include/simdjson/internal/jsonformatutils.h include/simdjson/jsonioutil.h include/simdjson/jsonparser.h include/simdjson/padded_string.h include/simdjson/inline/padded_string.h include/simdjson/document.h include/simdjson/inline/document.h include/simdjson/document_iterator.h include/simdjson/inline/document_iterator.h include/simdjson/document_stream.h include/simdjson/inline/document_stream.h include/simdjson/implementation.h include/simdjson/parsedjson.h include/simdjson/jsonstream.h include/simdjson/inline/jsonstream.h include/simdjson/portability.h include/simdjson/error.h include/simdjson/inline/error.h include/simdjson/simdjson.h include/simdjson/simdjson_version.h
6565

6666
ifeq ($(SIMDJSON_TEST_AMALGAMATED_HEADERS),1)
6767
HEADERS=singleheader/simdjson.h

amalgamation.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ int main(int argc, char *argv[]) {
143143
// parse_many
144144
const char * filename2 = argv[2];
145145
for (auto result : parser.load_many(filename2)) {
146-
error = result.error;
146+
error = result.error();
147147
}
148148
if (error) {
149149
std::cout << "parse_many failed" << std::endl;

benchmark/minifiercompetition.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -98,16 +98,14 @@ int main(int argc, char *argv[]) {
9898
"despacing with RapidJSON Insitu", rapid_stringme_insitu((char *)buffer),
9999
memcpy(buffer, p.data(), p.size()), repeat, volume, !just_data);
100100
memcpy(buffer, p.data(), p.size());
101-
102-
size_t outlength = simdjson::json_minify((const uint8_t *)buffer, p.size(),
103-
(uint8_t *)buffer);
104-
if (verbose)
105-
std::cout << "json_minify length is " << outlength << std::endl;
106-
101+
size_t outlength;
107102
uint8_t *cbuffer = (uint8_t *)buffer;
108-
BEST_TIME("json_minify", simdjson::json_minify(cbuffer, p.size(), cbuffer),
103+
for (auto imple : simdjson::available_implementations) {
104+
BEST_TIME((std::string("simdjson->minify+")+imple->name()).c_str(), (imple->minify(cbuffer, p.size(), cbuffer, outlength), outlength),
109105
outlength, memcpy(buffer, p.data(), p.size()), repeat, volume,
110106
!just_data);
107+
}
108+
111109
printf("minisize = %zu, original size = %zu (minified down to %.2f percent "
112110
"of original) \n",
113111
outlength, p.size(), outlength * 100.0 / p.size());
@@ -121,8 +119,9 @@ int main(int argc, char *argv[]) {
121119
!just_data);
122120

123121
char *mini_buffer = simdjson::internal::allocate_padded_buffer(p.size() + 1);
124-
size_t minisize = simdjson::json_minify((const uint8_t *)p.data(), p.size(),
125-
(uint8_t *)mini_buffer);
122+
size_t minisize;
123+
simdjson::active_implementation->minify((const uint8_t *)p.data(), p.size(),
124+
(uint8_t *)mini_buffer, minisize);
126125
mini_buffer[minisize] = '\0';
127126

128127
BEST_TIME("RapidJSON Insitu despaced", d.ParseInsitu(buffer).HasParseError(),
@@ -171,6 +170,7 @@ int main(int argc, char *argv[]) {
171170
automated_reallocation),
172171
simdjson::SUCCESS, memcpy(buffer, mini_buffer, p.size()), repeat, volume,
173172
!just_data);
173+
174174
free(buffer);
175175
free(ast_buffer);
176176
free(mini_buffer);

benchmark/parse.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,12 @@ struct option_struct {
109109
case 'a': {
110110
const implementation *impl = simdjson::available_implementations[optarg];
111111
if (!impl) {
112-
exit_usage(string("Unsupported option value -a ") + optarg + ": expected -a haswell, westmere or arm64");
112+
std::string exit_message = string("Unsupported option value -a ") + optarg + ": expected -a with one of ";
113+
for (auto imple : simdjson::available_implementations) {
114+
exit_message += imple->name();
115+
exit_message += " ";
116+
}
117+
exit_usage(exit_message);
113118
}
114119
simdjson::active_implementation = impl;
115120
break;

include/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ set(SIMDJSON_INCLUDE
1616
${SIMDJSON_INCLUDE_DIR}/simdjson/inline/padded_string.h
1717
${SIMDJSON_INCLUDE_DIR}/simdjson/internal/jsonformatutils.h
1818
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonioutil.h
19-
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonminifier.h
2019
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonparser.h
2120
${SIMDJSON_INCLUDE_DIR}/simdjson/jsonstream.h
2221
${SIMDJSON_INCLUDE_DIR}/simdjson/padded_string.h

include/simdjson.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include "simdjson/implementation.h"
1111
#include "simdjson/document.h"
1212
#include "simdjson/document_stream.h"
13-
#include "simdjson/jsonminifier.h"
1413

1514
// Deprecated API
1615
#include "simdjson/parsedjsoniterator.h"

include/simdjson/error.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,17 @@ struct simdjson_error : public std::exception {
7777
*/
7878
template<typename T>
7979
struct simdjson_result : public std::pair<T, error_code> {
80+
/**
81+
* Move the value and the error to the provided variables.
82+
*/
83+
void tie(T& t, error_code & e) {
84+
// on the clang compiler that comes with current macOS (Apple clang version 11.0.0),
85+
// tie(width, error) = size["w"].as_uint64_t();
86+
// fails with "error: no viable overloaded '='""
87+
t = std::move(this->first);
88+
e = std::move(this->second);
89+
}
90+
8091
/**
8192
* The error.
8293
*/
@@ -138,6 +149,7 @@ struct simdjson_move_result : std::pair<T, error_code> {
138149
t = std::move(this->first);
139150
e = std::move(this->second);
140151
}
152+
141153
/**
142154
* The error.
143155
*/

include/simdjson/implementation.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,19 @@ class implementation {
5656
*/
5757
WARN_UNUSED virtual error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept = 0;
5858

59+
/**
60+
* Run a full document parse (ensure_capacity, stage1 and stage2).
61+
*
62+
* Overridden by each implementation.
63+
*
64+
* @param buf the json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
65+
* @param len the length of the json document.
66+
* @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
67+
* @param dst_len the number of bytes written. 179B Output only.
68+
* @return the error code, or SUCCESS if there was no error.
69+
*/
70+
WARN_UNUSED virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
71+
5972
/**
6073
* Stage 1 of the document parser.
6174
*
@@ -182,6 +195,9 @@ class detect_best_supported_implementation_on_first_use final : public implement
182195
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final {
183196
return set_best()->parse(buf, len, parser);
184197
}
198+
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final {
199+
return set_best()->minify(buf, len, dst, dst_len);
200+
}
185201
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final {
186202
return set_best()->stage1(buf, len, parser, streaming);
187203
}

include/simdjson/jsonminifier.h

Lines changed: 0 additions & 32 deletions
This file was deleted.

src/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ set(SIMDJSON_SRC
2929
set(SIMDJSON_SRC_HEADERS
3030
implementation.cpp
3131
isadetection.h
32-
jsonminifier.cpp
3332
simdprune_tables.h
3433
stage1_find_marks.cpp
3534
stage2_build_tape.cpp

src/arm64/bitmanipulation.h

Lines changed: 1 addition & 1 deletion
10000
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ really_inline int leading_zeroes(uint64_t input_num) {
4848
}
4949

5050
/* result might be undefined when input_num is zero */
51-
really_inline int hamming(uint64_t input_num) {
51+
really_inline int count_ones(uint64_t input_num) {
5252
return vaddv_u8(vcnt_u8((uint8x8_t)input_num));
5353
}
5454

src/arm64/implementation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ class implementation final : public simdjson::implementation {
1010
public:
1111
really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", instruction_set::NEON) {}
1212
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
13+
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
1314
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final;
1415
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
1516
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept final;

src/arm64/simd.h

Lines changed: 46 additions & 1 deletion
+
// we do it in two steps, first 8 bytes and then second 8 bytes
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#define SIMDJSON_ARM64_SIMD_H
33

44
#include "simdjson.h"
5+
#include "simdprune_tables.h"
6+
#include "arm64/bitmanipulation.h"
57
#include "arm64/intrinsics.h"
68

79
namespace simdjson::arm64::simd {
@@ -142,6 +144,43 @@ namespace simdjson::arm64::simd {
142144
really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
143145
return lookup_table.apply_lookup_16_to(*this);
144146
}
147+
148+
149+
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
150+
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
151+
// Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes
152+
// get written.
153+
// Design consideration: it seems like a function with the
154+
// signature simd8<L> compress(uint16_t mask) would be
155+
// sensible, but the AVX ISA makes this kind of approach difficult.
156+
template<typename L>
157+
really_inline void compress(uint16_t mask, L * output) const {
158+
// this particular implementation was inspired by work done by @animetosho
159
160+
uint8_t mask1 = static_cast<uint8_t>(mask); // least significant 8 bits
161+
uint8_t mask2 = static_cast<uint8_t>(mask >> 8); // most significant 8 bits
162+
// next line just loads the 64-bit values thintable_epi8[mask1] and
163+
// thintable_epi8[mask2] into a 128-bit register, using only
164+
// two instructions on most compilers.
165+
uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]};
166+
uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64);
167+
// we increment by 0x08 the second half of the mask
168+
uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08};
169+
shufmask = vaddq_u8(shufmask, inc);
170+
// this is the version "nearly pruned"
171+
uint8x16_t pruned = vqtbl1q_u8(*this, shufmask);
172+
// we still need to put the two halves together.
173+
// we compute the popcount of the first half:
174+
int pop1 = BitsSetTable256mul2[mask1];
175+
// then load the corresponding mask, what it does is to write
176+
// only the first pop1 bytes from the first 8 bytes, and then
177+
// it fills in with the bytes from the second 8 bytes + some filling
178+
// at the end.
179+
uint8x16_t compactmask = vld1q_u8((const uint8_t *)(pshufb_combine_table + pop1 * 8));
180+
uint8x16_t answer = vqtbl1q_u8(pruned, compactmask);
181+
vst1q_u8((uint8_t*) output, answer);
182+
}
183+
145184
template<typename L>
146185
really_inline simd8<L> lookup_16(
147186
L replace0, L replace1, L replace2, L replace3,
@@ -267,6 +306,13 @@ namespace simdjson::arm64::simd {
267306
this->chunks[3].store(ptr+sizeof(simd8<T>)*3);
268307
}
269308

309+
really_inline void compress(uint64_t mask, T * output) const {
310+
this->chunks[0].compress(mask, output);
311+
this->chunks[1].compress(mask >> 16, output + 16 - count_ones(mask & 0xFFFF));
312+
this->chunks[2].compress(mask >> 32, output + 32 - count_ones(mask & 0xFFFFFFFF));
313+
this->chunks[3].compress(mask >> 48, output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
314+
}
315+
270316
template <typename F>
271317
static really_inline void each_index(F const& each) {
272318
each(0);
@@ -339,7 +385,6 @@ namespace simdjson::arm64::simd {
339385
const simd8<T> mask = simd8<T>::splat(m);
340386
return this->map( [&](auto a) { return a <= mask; } ).to_bitmask();
341387
}
342-
343388
}; // struct simd8x64<T>
344389

345390
} // namespace simdjson::arm64::simd

src/arm64/stage1_find_marks.h

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,23 @@ really_inline json_character_block json_character_block::classify(const simd::si
3131
return shuf_lo & shuf_hi;
3232
});
3333

34+
35+
// We compute whitespace and op separately. If the code later only use one or the
36+
// other, given the fact that all functions are aggressively inlined, we can
37+
// hope that useless computations will be omitted. This is namely case when
38+
// minifying (we only need whitespace). *However* if we only need spaces,
39+
// it is likely that we will still compute 'v' above with two lookup_16: one
40+
// could do it a bit cheaper. This is in contrast with the x64 implementations
41+
// where we can, efficiently, do the white space and structural matching
42+
// separately. One reason for this difference is that on ARM NEON, the table
43+
// lookups either zero or leave unchanged the characters exceeding 0xF whereas
44+
// on x64, the equivalent instruction (pshufb) automatically applies a mask,
45+
// ignoring the 4 most significant bits. Thus the x64 implementation is
46+
// optimized differently. This being said, if you use this code strictly
47+
// just for minification (or just to identify the structural characters),
48+
// there is a small untaken optimization opportunity here. We deliberately
49+
// do not pick it up.
50+
3451
uint64_t op = v.map([&](simd8<uint8_t> _v) { return _v.any_bits_set(0x7); }).to_bitmask();
3552
uint64_t whitespace = v.map([&](simd8<uint8_t> _v) { return _v.any_bits_set(0x18); }).to_bitmask();
3653
return { whitespace, op };
@@ -53,11 +70,17 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
5370
return is_second_byte ^ is_third_byte ^ is_fourth_byte;
5471
}
5572

56-
#include "generic/utf8_lookup2_algorithm.h"
73+
#include "generic/buf_block_reader.h"
5774
#include "generic/json_string_scanner.h"
5875
#include "generic/json_scanner.h"
59-
#include "generic/json_structural_indexer.h"
6076

77+
#include "generic/json_minifier.h"
78+
WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
79+
return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
80+
}
81+
82+
#include "generic/utf8_lookup2_algorithm.h"
83+
#include "generic/json_structural_indexer.h"
6184
WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept {
6285
return arm64::stage1::json_structural_indexer::index<64>(buf, len, parser, streaming);
6386
}

src/fallback/implementation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class implementation final : public simdjson::implementation {
1414
0
1515
) {}
1616
WARN_UNUSED error_code parse(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
17+
WARN_UNUSED error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final;
1718
WARN_UNUSED error_code stage1(const uint8_t *buf, size_t len, document::parser &parser, bool streaming) const noexcept final;
1819
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser) const noexcept final;
1920
WARN_UNUSED error_code stage2(const uint8_t *buf, size_t len, document::parser &parser, size_t &next_json) const noexcept final;

0 commit comments

Comments
 (0)
0