1
1
/*
2
- * This file is part of the Micro Python project, http://micropython.org/
2
+ * This file is part of the MicroPython project, http://micropython.org/
3
3
*
4
4
* The MIT License (MIT)
5
5
*
6
- * Copyright (c) 2014 Damien P. George
6
+ * Copyright (c) 2014-2016 Damien P. George
7
7
*
8
8
* Permission is hereby granted, free of charge, to any person obtaining a copy
9
9
* of this software and associated documentation files (the "Software"), to deal
28
28
29
29
#include "py/nlr.h"
30
30
#include "py/objlist.h"
31
+ #include "py/objstringio.h"
31
32
#include "py/parsenum.h"
32
33
#include "py/runtime.h"
34
+ #include "py/stream.h"
33
35
34
36
#if MICROPY_PY_UJSON
35
37
@@ -42,7 +44,7 @@ STATIC mp_obj_t mod_ujson_dumps(mp_obj_t obj) {
42
44
}
43
45
STATIC MP_DEFINE_CONST_FUN_OBJ_1 (mod_ujson_dumps_obj , mod_ujson_dumps );
44
46
45
- // This function implements a simple non-recursive JSON parser.
47
+ // The function below implements a simple non-recursive JSON parser.
46
48
//
47
49
// The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt
48
50
// The parser here will parse any valid JSON and return the correct
@@ -52,13 +54,35 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps);
52
54
// input is outside it's specs.
53
55
//
54
56
// Most of the work is parsing the primitives (null, false, true, numbers,
55
- // strings). It does 1 pass over the input string and so is easily extended to
56
- // being able to parse from a non-seekable stream. It tries to be fast and
57
+ // strings). It does 1 pass over the input stream. It tries to be fast and
57
58
// small in code size, while not using more RAM than necessary.
58
- STATIC mp_obj_t mod_ujson_loads (mp_obj_t obj ) {
59
- mp_uint_t len ;
60
- const char * s = mp_obj_str_get_data (obj , & len );
61
- const char * top = s + len ;
59
+
60
+ typedef struct _ujson_stream_t {
61
+ mp_obj_t stream_obj ;
62
+ mp_uint_t (* read )(mp_obj_t obj , void * buf , mp_uint_t size , int * errcode );
63
+ int errcode ;
64
+ byte cur ;
65
+ } ujson_stream_t ;
66
+
67
+ #define S_EOF (0) // null is not allowed in json stream so is ok as EOF marker
68
+ #define S_END (s ) ((s).cur == S_EOF)
69
+ #define S_CUR (s ) ((s).cur)
70
+ #define S_NEXT (s ) (ujson_stream_next(&(s)))
71
+
72
+ STATIC byte ujson_stream_next (ujson_stream_t * s ) {
73
+ mp_uint_t ret = s -> read (s -> stream_obj , & s -> cur , 1 , & s -> errcode );
74
+ if (s -> errcode != 0 ) {
75
+ mp_raise_OSError (s -> errcode );
76
+ }
77
+ if (ret == 0 ) {
78
+ s -> cur = S_EOF ;
79
+ }
80
+ return s -> cur ;
81
+ }
82
+
83
+ STATIC mp_obj_t mod_ujson_load (mp_obj_t stream_obj ) {
84
+ const mp_stream_p_t * stream_p = mp_get_stream_raise (stream_obj , MP_STREAM_OP_READ );
85
+ ujson_stream_t s = {stream_obj , stream_p -> read , 0 , 0 };
62
86
vstr_t vstr ;
63
87
vstr_init (& vstr , 8 );
64
88
mp_obj_list_t stack ; // we use a list as a simple stack for nested JSON
@@ -67,64 +91,64 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
67
91
mp_obj_t stack_top = MP_OBJ_NULL ;
68
92
mp_obj_type_t * stack_top_type = NULL ;
69
93
mp_obj_t stack_key = MP_OBJ_NULL ;
94
+ S_NEXT (s );
70
95
for (;;) {
71
96
cont :
72
- if (s == top ) {
97
+ if (S_END ( s ) ) {
73
98
break ;
74
99
}
75
100
mp_obj_t next = MP_OBJ_NULL ;
76
101
bool enter = false;
77
- switch (* s ) {
102
+ byte cur = S_CUR (s );
103
+ S_NEXT (s );
104
+ switch (cur ) {
78
105
case ',' :
79
106
case ':' :
80
107
case ' ' :
81
108
case '\t' :
82
109
case '\n' :
83
110
case '\r' :
84
- s += 1 ;
85
111
goto cont ;
86
112
case 'n' :
87
- if (s + 3 < top && s [ 1 ] == 'u' && s [ 2 ] == 'l' && s [ 3 ] == 'l' ) {
88
- s += 4 ;
113
+ if (S_CUR ( s ) == 'u' && S_NEXT ( s ) == 'l' && S_NEXT ( s ) == 'l' ) {
114
+ S_NEXT ( s ) ;
89
115
next = mp_const_none ;
90
116
} else {
91
117
goto fail ;
92
118
}
93
119
break ;
94
120
case 'f' :
95
- if (s + 4 < top && s [ 1 ] == 'a' && s [ 2 ] == 'l' && s [ 3 ] == 's' && s [ 4 ] == 'e' ) {
96
- s += 5 ;
121
+ if (S_CUR ( s ) == 'a' && S_NEXT ( s ) == 'l' && S_NEXT ( s ) == 's' && S_NEXT ( s ) == 'e' ) {
122
+ S_NEXT ( s ) ;
97
123
next = mp_const_false ;
98
124
} else {
99
125
goto fail ;
100
126
}
101
127
break ;
102
128
case 't' :
103
- if (s + 3 < top && s [ 1 ] == 'r' && s [ 2 ] == 'u' && s [ 3 ] == 'e' ) {
104
- s += 4 ;
129
+ if (S_CUR ( s ) == 'r' && S_NEXT ( s ) == 'u' && S_NEXT ( s ) == 'e' ) {
130
+ S_NEXT ( s ) ;
105
131
next = mp_const_true ;
106
132
} else {
107
133
goto fail ;
108
134
}
109
135
break ;
110
136
case '"' :
111
137
vstr_reset (& vstr );
112
- for (s ++ ; s < top && * s != '"' ;) {
113
- byte c = * s ;
138
+ for (; ! S_END ( s ) && S_CUR ( s ) != '"' ;) {
139
+ byte c = S_CUR ( s ) ;
114
140
if (c == '\\' ) {
115
- s ++ ;
116
- c = * s ;
141
+ c = S_NEXT (s );
117
142
switch (c ) {
118
143
case 'b' : c = 0x08 ; break ;
119
144
case 'f' : c = 0x0c ; break ;
120
145
case 'n' : c = 0x0a ; break ;
121
146
case 'r' : c = 0x0d ; break ;
122
147
case 't' : c = 0x09 ; break ;
123
148
case 'u' : {
124
- if (s + 4 >= top ) { goto fail ; }
125
149
mp_uint_t num = 0 ;
126
150
for (int i = 0 ; i < 4 ; i ++ ) {
127
- c = (* ++ s | 0x20 ) - '0' ;
151
+ c = (S_NEXT ( s ) | 0x20 ) - '0' ;
128
152
if (c > 9 ) {
129
153
c -= ('a' - ('9' + 1 ));
130
154
}
@@ -137,27 +161,29 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
137
161
}
138
162
vstr_add_byte (& vstr , c );
139
163
str_cont :
140
- s ++ ;
164
+ S_NEXT ( s ) ;
141
165
}
142
- if (s == top ) {
166
+ if (S_END ( s ) ) {
143
167
goto fail ;
144
168
}
145
- s ++ ;
169
+ S_NEXT ( s ) ;
146
170
next = mp_obj_new_str (vstr .buf , vstr .len , false);
147
171
break ;
148
172
case '-' :
149
173
case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : {
150
174
bool flt = false;
151
175
vstr_reset (& vstr );
152
- for (; s < top ; s ++ ) {
153
- if (* s == '.' || * s == 'E' || * s == 'e' ) {
176
+ for (;;) {
177
+ vstr_add_byte (& vstr , cur );
178
+ cur = S_CUR (s );
179
+ if (cur == '.' || cur == 'E' || cur == 'e' ) {
154
180
flt = true;
155
- } else if (* s == '-' || unichar_isdigit (* s )) {
181
+ } else if (cur == '-' || unichar_isdigit (cur )) {
156
182
// pass
157
183
} else {
158
184
break ;
159
185
}
160
- vstr_add_byte ( & vstr , * s );
186
+ S_NEXT ( s );
161
187
}
162
188
if (flt ) {
163
189
next = mp_parse_num_decimal (vstr .buf , vstr .len , false, false, NULL );
@@ -169,16 +195,13 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
169
195
case '[' :
170
196
next = mp_obj_new_list (0 , NULL );
171
197
enter = true;
172
- s += 1 ;
173
198
break ;
174
199
case '{' :
175
200
next = mp_obj_new_dict (0 );
176
201
enter = true;
177
- s += 1 ;
178
202
break ;
179
203
case '}' :
180
204
case ']' : {
181
- s += 1 ;
182
205
if (stack_top == MP_OBJ_NULL ) {
183
206
// no object at all
184
207
goto fail ;
@@ -231,10 +254,10 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
231
254
}
232
255
success :
233
256
// eat trailing whitespace
234
- while (s < top && unichar_isspace (* s )) {
235
- s ++ ;
257
+ while (unichar_isspace (S_CUR ( s ) )) {
258
+ S_NEXT ( s ) ;
236
259
}
237
- if (s < top ) {
260
+ if (! S_END ( s ) ) {
238
261
// unexpected chars
239
262
goto fail ;
240
263
}
@@ -248,11 +271,21 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
248
271
fail :
249
272
nlr_raise (mp_obj_new_exception_msg (& mp_type_ValueError , "syntax error in JSON" ));
250
273
}
274
+ STATIC MP_DEFINE_CONST_FUN_OBJ_1 (mod_ujson_load_obj , mod_ujson_load );
275
+
276
+ STATIC mp_obj_t mod_ujson_loads (mp_obj_t obj ) {
277
+ mp_uint_t len ;
278
+ const char * buf = mp_obj_str_get_data (obj , & len );
279
+ vstr_t vstr = {len , len , (char * )buf , true};
280
+ mp_obj_stringio_t sio = {{& mp_type_stringio }, & vstr , 0 };
281
+ return mod_ujson_load (& sio );
282
+ }
251
283
STATIC MP_DEFINE_CONST_FUN_OBJ_1 (mod_ujson_loads_obj , mod_ujson_loads );
252
284
253
285
STATIC const mp_rom_map_elem_t mp_module_ujson_globals_table [] = {
254
286
{ MP_ROM_QSTR (MP_QSTR___name__ ), MP_ROM_QSTR (MP_QSTR_ujson ) },
255
287
{ MP_ROM_QSTR (MP_QSTR_dumps ), MP_ROM_PTR (& mod_ujson_dumps_obj ) },
288
+ { MP_ROM_QSTR (MP_QSTR_load ), MP_ROM_PTR (& mod_ujson_load_obj ) },
256
289
{ MP_ROM_QSTR (MP_QSTR_loads ), MP_ROM_PTR (& mod_ujson_loads_obj ) },
257
290
};
258
291
0 commit comments