1
1
#include < iostream>
2
+ #include < set>
2
3
3
4
#include " simdjson.h"
4
5
@@ -25,6 +26,7 @@ struct stat_s {
25
26
size_t unsigned_integer_count;
26
27
size_t float_count;
27
28
size_t string_count;
29
+ size_t string_byte_count;
28
30
size_t backslash_count;
29
31
size_t non_ascii_byte_count;
30
32
size_t object_count;
@@ -42,7 +44,11 @@ struct stat_s {
42
44
size_t maximum_object_size;
43
45
size_t maximum_array_size;
44
46
size_t string_maximum_length;
47
+ size_t repeated_key_byte_count;
48
+
45
49
bool valid;
50
+ std::set<std::string_view> all_keys;
51
+ std::set<std::string_view> repeated_keys;
46
52
};
47
53
48
54
using stat_t = struct stat_s ;
@@ -80,6 +86,12 @@ void recurse(simdjson::dom::element element, stat_t &s, size_t depth) {
80
86
size_t counter = 0 ;
81
87
for (auto [key, value] : object) {
82
88
counter++;
89
+ if (s.all_keys .find (key) != s.all_keys .end ()) {
90
+ s.repeated_keys .insert (key);
91
+ s.repeated_key_byte_count += key.size ();
92
+ } else {
93
+ s.all_keys .insert (key);
94
+ }
83
95
if (is_ascii (key)) {
84
96
s.ascii_key_count ++;
85
97
s.ascii_string_count ++;
@@ -91,6 +103,7 @@ void recurse(simdjson::dom::element element, stat_t &s, size_t depth) {
91
103
s.string_maximum_length = key.size ();
92
104
}
93
105
s.string_count ++;
106
+ s.string_byte_count += key.size ();
94
107
s.key_count ++;
95
108
recurse (value, s, depth + 1 );
96
109
}
@@ -133,10 +146,9 @@ void recurse(simdjson::dom::element element, stat_t &s, size_t depth) {
133
146
if (is_ascii (v)) {
134
147
s.ascii_string_count ++;
135
148
}
136
- std::string_view strval;
137
- element.get <std::string_view>().tie (strval, error);
138
- if (strval.size () > s.string_maximum_length ) {
139
- s.string_maximum_length = strval.size ();
149
+ s.string_byte_count += v.size ();
150
+ if (v.size () > s.string_maximum_length ) {
151
+ s.string_maximum_length = v.size ();
140
152
}
141
153
} else {
142
154
std::cerr << " unrecognized node." << std::endl;
@@ -200,6 +212,7 @@ int main(int argc, char *argv[]) {
200
212
"unsigned_integer_count" = %10zu,
201
213
"float_count" = %10zu,
202
214
"string_count" = %10zu,
215
+ "string_byte_count" = %10zu,
203
216
"ascii_string_count" = %10zu,
204
217
"string_maximum_length" = %10zu,
205
218
"backslash_count" = %10zu,
@@ -216,15 +229,19 @@ int main(int argc, char *argv[]) {
216
229
"key_count" = %10zu,
217
230
"ascii_key_count" = %10zu,
218
231
"key_maximum_length" = %10zu,
232
+ "key_distinct_count" = %10zu,
233
+ "repeated_key_distinct_count"= %10zu,
234
+ "repeated_key_byte_count" = %10zu;
219
235
"maximum_depth" = %10zu
220
236
}
221
237
)" ,
222
238
s.integer_count ,s.integer32_count ,s.unsigned_integer32_count ,s.unsigned_integer_count ,
223
- s.float_count , s.string_count , s.ascii_string_count ,
239
+ s.float_count , s.string_count , s.string_byte_count , s. ascii_string_count ,
224
240
s.string_maximum_length , s.backslash_count , s.non_ascii_byte_count ,
225
241
s.object_count , s.maximum_object_size , s.array_count ,
226
242
s.maximum_array_size , s.null_count , s.true_count , s.false_count ,
227
243
s.byte_count , s.structural_indexes_count , s.key_count ,
228
- s.ascii_key_count , s.key_maximum_length , s.maximum_depth );
244
+ s.ascii_key_count , s.key_maximum_length , s.all_keys .size (), s.repeated_keys .size (),
245
+ s.repeated_key_byte_count , s.maximum_depth );
229
246
return EXIT_SUCCESS;
230
247
}
0 commit comments