10000 Merge structural_parser+iterator into json_iterator · JavaScriptExpert/simdjson@658f166 · GitHub
[go: up one dir, main page]

Skip to content

Commit 658f166

Browse files
committed
Merge structural_parser+iterator into json_iterator
1 parent 61b51d5 commit 658f166

File tree

4 files changed

+65
-99
lines changed

4 files changed

+65
-99
lines changed

src/generic/stage2/structural_parser.h renamed to src/generic/stage2/json_iterator.h

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,41 @@
1-
// This file contains the common code every implementation uses for stage2
2-
// It is intended to be included multiple times and compiled multiple times
3-
// We assume the file in which it is include already includes
4-
// "simdjson/stage2.h" (this simplifies amalgation)
5-
61
#include "generic/stage2/logger.h"
7-
#include "generic/stage2/structural_iterator.h"
82

9-
namespace { // Make everything here private
3+
namespace {
104
namespace SIMDJSON_IMPLEMENTATION {
115
namespace stage2 {
126

13-
#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
14-
15-
struct structural_parser : structural_iterator {
7+
class json_iterator {
8+
public:
9+
const uint8_t* const buf;
10+
uint32_t *next_structural;
11+
dom_parser_implementation &dom_parser;
1612

1713
template<bool STREAMING, typename T>
1814
WARN_UNUSED really_inline error_code walk_document(T &visitor) noexcept;
1915

20-
// For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations
21-
really_inline structural_parser(dom_parser_implementation &_dom_parser, uint32_t start_structural_index)
22-
: structural_iterator(_dom_parser, start_structural_index) {
16+
// Start a structural
17+
really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index)
18+
: buf{_dom_parser.buf},
19+
next_structural{&_dom_parser.structural_indexes[start_structural_index]},
20+
dom_parser{_dom_parser} {
21+
}
22+
23+
// Get the buffer position of the current structural character
24+
really_inline const uint8_t* advance() {
25+
return &buf[*(next_structural++)];
26+
}
27+
really_inline char advance_char() {
28+
return buf[*(next_structural++)];
29+
}
30+
really_inline size_t remaining_len() {
31+
return dom_parser.len - *(next_structural-1);
32+
}
33+
34+
really_inline bool at_end() {
35+
return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes];
36+
}
37+
really_inline bool at_beginning() {
38+
return next_structural == dom_parser.structural_indexes.get();
2339
}
2440

2541
really_inline void log_value(const char *type) {
@@ -39,10 +55,10 @@ struct structural_parser : structural_iterator {
3955
really_inline void log_error(const char *error) {
4056
logger::log_line(*this, "", "ERROR", error);
4157
}
42-
}; // struct structural_parser
58+
};
4359

4460
template<bool STREAMING, typename T>
45-
WARN_UNUSED really_inline error_code structural_parser::walk_document(T &visitor) noexcept {
61+
WARN_UNUSED really_inline error_code json_iterator::walk_document(T &visitor) noexcept {
4662
logger::log_start();
4763

4864
const uint8_t *value; // Used to keep a value around between states

src/generic/stage2/structural_iterator.h

Lines changed: 0 additions & 52 deletions
This file was deleted.

src/generic/stage2/tape_builder.h

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "generic/stage2/structural_parser.h"
1+
#include "generic/stage2/json_iterator.h"
22
#include "generic/stage2/tape_writer.h"
33
#include "generic/stage2/atomparsing.h"
44

@@ -12,12 +12,12 @@ struct tape_builder {
1212
dom_parser_implementation &dom_parser,
1313
dom::document &doc) noexcept {
1414
dom_parser.doc = &doc;
15-
structural_parser iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
15+
json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
1616
tape_builder builder(doc);
1717
return iter.walk_document<STREAMING>(builder);
1818
}
1919

20-
really_inline error_code root_primitive(structural_parser &iter, const uint8_t *value) {
20+
really_inline error_code root_primitive(json_iterator &iter, const uint8_t *value) {
2121
switch (*value) {
2222
case '"': return parse_string(iter, value);
2323
case 't': return parse_root_true_atom(iter, value);
@@ -32,7 +32,7 @@ struct tape_builder {
3232
return TAPE_ERROR;
3333
}
3434
}
35-
really_inline error_code primitive(structural_parser &iter, const uint8_t *value) {
35+
really_inline error_code primitive(json_iterator &iter, const uint8_t *value) {
3636
switch (*value) {
3737
case '"': return parse_string(iter, value);
3838
case 't': return parse_true_atom(iter, value);
@@ -47,29 +47,29 @@ struct tape_builder {
4747
return TAPE_ERROR;
4848
}
4949
}
50-
really_inline void empty_object(structural_parser &iter) {
50+
really_inline void empty_object(json_iterator &iter) {
5151
iter.log_value("empty object");
5252
empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
5353
}
54-
really_inline void empty_array(structural_parser &iter) {
54+
really_inline void empty_array(json_iterator &iter) {
5555
iter.log_value("empty array");
5656
empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
5757
}
5858

59-
really_inline void start_document(structural_parser &iter) {
59+
really_inline void start_document(json_iterator &iter) {
6060
iter.log_start_value("document");
6161
start_container(iter);
6262
iter.dom_parser.is_array[depth] = false;
6363
}
64-
WARN_UNUSED really_inline error_code start_object(structural_parser &iter) {
64+
WARN_UNUSED really_inline error_code start_object(json_iterator &iter) {
6565
iter.log_start_value("object");
6666
depth++;
6767
if (depth >= iter.dom_parser.max_depth()) { iter.log_error("Exceeded max depth!"); return DEPTH_ERROR; }
6868
start_container(iter);
6969
iter.dom_parser.is_array[depth] = false;
7070
return SUCCESS;
7171
}
72-
WARN_UNUSED really_inline error_code start_array(structural_parser &iter) {
72+
WARN_UNUSED really_inline error_code start_array(json_iterator &iter) {
7373
iter.log_start_value("array");
7474
depth++;
7575
if (depth >= iter.dom_parser.max_depth()) { iter.log_error("Exceeded max depth!"); return DEPTH_ERROR; }
@@ -78,15 +78,15 @@ struct tape_builder {
7878
return SUCCESS;
7979
}
8080

81-
really_inline void end_object(structural_parser &iter) {
81+
really_inline void end_object(json_iterator &iter) {
8282
iter.log_end_value("object");
8383
end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
8484
}
85-
really_inline void end_array(structural_parser &iter) {
85+
really_inline void end_array(json_iterator &iter) {
8686
iter.log_end_value("array");
8787
end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
8888
}
89-
WARN_UNUSED really_inline error_code end_document(structural_parser &iter) {
89+
WARN_UNUSED really_inline error_code end_document(json_iterator &iter) {
9090
iter.log_end_value("document");
9191
constexpr uint32_t start_tape_index = 0;
9292
tape.append(start_tape_index, internal::tape_type::ROOT);
@@ -98,7 +98,7 @@ struct tape_builder {
9898
}
9999
return SUCCESS;
100100
}
101-
WARN_UNUSED really_inline error_code key(structural_parser &iter, const uint8_t *value) {
101+
WARN_UNUSED really_inline error_code key(json_iterator &iter, const uint8_t *value) {
102102
return parse_string(iter, value, true);
103103
}
104104

@@ -108,21 +108,21 @@ struct tape_builder {
108108
// The object returned from end_container() should support the in_container(),
109109
// in_array() and in_object() methods, allowing the iterator to branch to the
110110
// correct place.
111-
really_inline tape_builder &end_container(structural_parser &) {
111+
really_inline tape_builder &end_container(json_iterator &) {
112112
depth--;
113113
return *this;
114114
}
115115
// increment_count increments the count of keys in an object or values in an array.
116-
really_inline void increment_count(structural_parser &iter) {
116+
really_inline void i 10000 ncrement_count(json_iterator &iter) {
117117
iter.dom_parser.open_containers[depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1
118118
}
119-
really_inline bool in_container(structural_parser &) {
119+
really_inline bool in_container(json_iterator &) {
120120
return depth != 0;
121121
}
122-
really_inline bool in_array(structural_parser &iter) {
122+
really_inline bool in_array(json_iterator &iter) {
123123
return iter.dom_parser.is_array[depth];
124124
}
125-
really_inline bool in_object(structural_parser &iter) {
125+
really_inline bool in_object(json_iterator &iter) {
126126
return !iter.dom_parser.is_array[depth];
127127
}
128128

@@ -136,7 +136,7 @@ struct tape_builder {
136136

137137
really_inline tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {}
138138

139-
WARN_UNUSED really_inline error_code parse_string(structural_parser &iter, const uint8_t *value, bool key = false) {
139+
WARN_UNUSED really_inline error_code parse_string(json_iterator &iter, const uint8_t *value, bool key = false) {
140140
iter.log_value(key ? "key" : "string");
141141
uint8_t *dst = on_start_string(iter);
142142
dst = stringparsing::parse_string(value, dst);
@@ -148,13 +148,13 @@ struct tape_builder {
148148
return SUCCESS;
149149
}
150150

151-
WARN_UNUSED really_inline error_code parse_number(structural_parser &iter, const uint8_t *value) {
151+
WARN_UNUSED really_inline error_code parse_number(json_iterator &iter, const uint8_t *value) {
152152
iter.log_value("number");
153153
if (!numberparsing::parse_number(value, tape)) { iter.log_error("Invalid number"); return NUMBER_ERROR; }
154154
return SUCCESS;
155155
}
156156

157-
really_inline error_code parse_root_number(structural_parser &iter, const uint8_t *value) {
157+
really_inline error_code parse_root_number(json_iterator &iter, const uint8_t *value) {
158158
//
159159
// We need to make a copy to make sure that the string is space terminated.
160160
// This is not about padding the input, which should already padded up
@@ -179,42 +179,42 @@ struct tape_builder {
179179
return error;
180180
}
181181

182-
WARN_UNUSED really_inline error_code parse_true_atom(structural_parser &iter, const uint8_t *value) {
182+
WARN_UNUSED really_inline error_code parse_true_atom(json_iterator &iter, const uint8_t *value) {
183183
iter.log_value("true");
184184
if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
185185
tape.append(0, internal::tape_type::TRUE_VALUE);
186186
return SUCCESS;
187187
}
188188

189-
WARN_UNUSED really_inline error_code parse_root_true_atom(structural_parser &iter, const uint8_t *value) {
189+
WARN_UNUSED really_inline error_code parse_root_true_atom(json_iterator &iter, const uint8_t *value) {
190190
iter.log_value("true");
191191
if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; }
192192
tape.append(0, internal::tape_type::TRUE_VALUE);
193193
return SUCCESS;
194194
}
195195

196-
WARN_UNUSED really_inline error_code parse_false_atom(structural_parser &iter, const uint8_t *value) {
196+
WARN_UNUSED really_inline error_code parse_false_atom(json_iterator &iter, const uint8_t *value) {
197197
iter.log_value("false");
198198
if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
199199
tape.append(0, internal::tape_type::FALSE_VALUE);
200200
return SUCCESS;
201201
}
202202

203-
WARN_UNUSED really_inline error_code parse_root_false_atom(structural_parser &iter, const uint8_t *value) {
203+
WARN_UNUSED really_inline error_code parse_root_false_atom(json_iterator &iter, const uint8_t *value) {
204204
iter.log_value("false");
205205
if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; }
206206
tape.append(0, internal::tape_type::FALSE_VALUE);
207207
return SUCCESS;
208208
}
209209

210-
WARN_UNUSED really_inline error_code parse_null_atom(structural_parser &iter, const uint8_t *value) {
210+
WARN_UNUSED really_inline error_code parse_null_atom(json_iterator &iter, const uint8_t *value) {
211211
iter.log_value("null");
212212
if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
213213
tape.append(0, internal::tape_type::NULL_VALUE);
214214
return SUCCESS;
215215
}
216216

217-
WARN_UNUSED really_inline error_code parse_root_null_atom(structural_parser &iter, const uint8_t *value) {
217+
WARN_UNUSED really_inline error_code parse_root_null_atom(json_iterator &iter, const uint8_t *value) {
218218
iter.log_value("null");
219219
if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; }
220220
tape.append(0, internal::tape_type::NULL_VALUE);
@@ -223,23 +223,23 @@ struct tape_builder {
223223

224224
// private:
225225

226-
really_inline uint32_t next_tape_index(structural_parser &iter) {
226+
really_inline uint32_t next_tape_index(json_iterator &iter) {
227227
return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get());
228228
}
229229

230-
really_inline void empty_container(structural_parser &iter, internal::tape_type start, internal::tape_type end) {
230+
really_inline void empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) {
231231
auto start_index = next_tape_index(iter);
232232
tape.append(start_index+2, start);
233233
tape.append(start_index, end);
234234
}
235235

236-
really_inline void start_container(structural_parser &iter) {
236+
really_inline void start_container(json_iterator &iter) {
237237
iter.dom_parser.open_containers[depth].tape_index = next_tape_index(iter);
238238
iter.dom_parser.open_containers[depth].count = 0;
239239
tape.skip(); // We don't actually *write* the start element until the end.
240240
}
241241

242-
really_inline void end_container(structural_parser &iter, internal::tape_type start, internal::tape_type end) noexcept {
242+
really_inline void end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept {
243243
// Write the ending tape element, pointing at the start location
244244
const uint32_t start_tape_index = iter.dom_parser.open_containers[depth].tape_index;
245245
tape.append(start_tape_index, end);
@@ -251,7 +251,7 @@ struct tape_builder {
251251
tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start);
252252
}
253253

254-
really_inline uint8_t *on_start_string(structural_parser &iter) noexcept {
254+
really_inline uint8_t *on_start_string(json_iterator &iter) noexcept {
255255
// we advance the point, accounting for the fact that we have a NULL termination
256256
tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING);
257257
return current_string_buf_loc + sizeof(uint32_t);

src/implementation.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
#include <initializer_list>
66

7+
#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
8+
79
// Static array of known implementations. We're hoping these get baked into the executable
810
// without requiring a static initializer.
911

0 commit comments

Comments
 (0)
0