8000 The jsonstats utility becomes ever more powerful. (#890) · JavaScriptExpert/simdjson@f346362 · GitHub
[go: up one dir, main page]

Skip to content

Commit f346362

Browse files
authored
The jsonstats utility becomes ever more powerful. (simdjson#890)
1 parent 8927a05 commit f346362

File tree

1 file changed

+23
-6
lines changed

1 file changed

+23
-6
lines changed

tools/jsonstats.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <iostream>
2+
#include <set>
23

34
#include "simdjson.h"
45

@@ -25,6 +26,7 @@ struct stat_s {
2526
size_t unsigned_integer_count;
2627
size_t float_count;
2728
size_t string_count;
29+
size_t string_byte_count;
2830
size_t backslash_count;
2931
size_t non_ascii_byte_count;
3032
size_t object_count;
@@ -42,7 +44,11 @@ struct stat_s {
4244
size_t maximum_object_size;
4345
size_t maximum_array_size;
4446
size_t string_maximum_length;
47+
size_t repeated_key_byte_count;
48+
4549
bool valid;
50+
std::set<std::string_view> all_keys;
51+
std::set<std::string_view> repeated_keys;
4652
};
4753

4854
using stat_t = struct stat_s;
@@ -80,6 +86,12 @@ void recurse(simdjson::dom::element element, stat_t &s, size_t depth) {
8086
size_t counter = 0;
8187
for (auto [key, value] : object) {
8288
counter++;
89+
if(s.all_keys.find(key) != s.all_keys.end()) {
90+
s.repeated_keys.insert(key);
91+
s.repeated_key_byte_count += key.size();
92+
} else {
93+
s.all_keys.insert(key);
94+
}
8395
if (is_ascii(key)) {
8496
s.ascii_key_count++;
8597
s.ascii_string_count++;
@@ -91,6 +103,7 @@ void recurse(simdjson::dom::element element, stat_t &s, size_t depth) {
91103
s.string_maximum_length = key.size();
92104
}
93105
s.string_count++;
106+
s.string_byte_count+= key.size();
94107
s.key_count++;
95108
recurse(value, s, depth + 1);
96109
}
@@ -133,10 +146,9 @@ void recurse(simdjson::dom::element element, stat_t &s, size_t depth) {
133146
if (is_ascii(v)) {
134147
s.ascii_string_count++;
135148
}
136-
std::string_view strval;
137-
element.get<std::string_view>().tie(strval, error);
138-
if (strval.size() > s.string_maximum_length) {
139-
s.string_maximum_length = strval.size();
149+
s.string_byte_count+= v.size();
150+
if (v.size() > s.string_maximum_length) {
151+
s.string_maximum_length = v.size();
140152
}
141153
} else {
142154
std::cerr << "unrecognized node." << std::endl;
@@ -200,6 +212,7 @@ int main(int argc, char *argv[]) {
200212
"unsigned_integer_count" = %10zu,
201213
"float_count" = %10zu,
202214
"string_count" = %10zu,
215+
"string_byte_count" = %10zu,
203216
"ascii_string_count" = %10zu,
204217
"string_maximum_length" = %10zu,
205218
"backslash_count" = %10zu,
@@ -216,15 +229,19 @@ int main(int argc, char *argv[]) {
216229
"key_count" = %10zu,
217230
"ascii_key_count" = %10zu,
218231
"key_maximum_length" = %10zu,
232+
"key_distinct_count" = %10zu,
233+
"repeated_key_distinct_count"= %10zu,
234+
"repeated_key_byte_count" = %10zu;
219235
"maximum_depth" = %10zu
220236
}
221237
)",
222238
s.integer_count,s.integer32_count,s.unsigned_integer32_count,s.unsigned_integer_count,
223-
s.float_count, s.string_count, s.ascii_string_count,
239+
s.float_count, s.string_count, s.string_byte_count, s.ascii_string_count,
224240
s.string_maximum_length, s.backslash_count, s.non_ascii_byte_count,
225241
s.object_count, s.maximum_object_size, s.array_count,
226242
s.maximum_array_size, s.null_count, s.true_count, s.false_count,
227243
s.byte_count, s.structural_indexes_count, s.key_count,
228-
s.ascii_key_count, s.key_maximum_length, s.maximum_depth);
244+
s.ascii_key_count, s.key_maximum_length, s.all_keys.size(), s.repeated_keys.size(),
245+
s.repeated_key_byte_count, s.maximum_depth);
229246
return EXIT_SUCCESS;
230247
}

0 commit comments

Comments
 (0)
0