10000 extmod/modujson: Implement ujson.load() to load JSON from a stream. · micropython/micropython@e93c1ca · GitHub
[go: up one dir, main page]

Skip to content

Commit e93c1ca

Browse files
committed
extmod/modujson: Implement ujson.load() to load JSON from a stream.
This refactors ujson.loads(s) to behave as ujson.load(StringIO(s)). Increase in code size is: 366 bytes for unix x86-64, 180 bytes for stmhal, 84 bytes for esp8266.
1 parent f17f331 commit e93c1ca

File tree

3 files changed

+109
-44
lines changed

3 files changed

+109
-44
lines changed

extmod/modujson.c

Lines changed: 70 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
/*
2-
* This file is part of the Micro Python project, http://micropython.org/
2+
* This file is part of the MicroPython project, http://micropython.org/
33
*
44
* The MIT License (MIT)
55
*
6-
* Copyright (c) 2014 Damien P. George
6+
* Copyright (c) 2014-2016 Damien P. George
77
*
88
* Permission is hereby granted, free of charge, to any person obtaining a copy
99
* of this software and associated documentation files (the "Software"), to deal
@@ -28,8 +28,10 @@
2828

2929
#include "py/nlr.h"
3030
#include "py/objlist.h"
31+
#include "py/objstringio.h"
3132
#include "py/parsenum.h"
3233
#include "py/runtime.h"
34+
#include "py/stream.h"
3335

3436
#if MICROPY_PY_UJSON
3537

@@ -42,7 +44,7 @@ STATIC mp_obj_t mod_ujson_dumps(mp_obj_t obj) {
4244
}
4345
STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps);
4446

45-
// This function implements a simple non-recursive JSON parser.
47+
// The function below implements a simple non-recursive JSON parser.
4648
//
4749
// The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt
4850
// The parser here will parse any valid JSON and return the correct
@@ -52,13 +54,35 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps);
5254
// input is outside it's specs.
5355
//
5456
// Most of the work is parsing the primitives (null, false, true, numbers,
55-
// strings). It does 1 pass over the input string and so is easily extended to
56-
// being able to parse from a non-seekable stream. It tries to be fast and
57+
// strings). It does 1 pass over the input stream. It tries to be fast and
5758
// small in code size, while not using more RAM than necessary.
58-
STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
59-
mp_uint_t len;
60-
const char *s = mp_obj_str_get_data(obj, &len);
61-
const char *top = s + len;
59+
60+
typedef struct _ujson_stream_t {
61+
mp_obj_t stream_obj;
62+
mp_uint_t (*read)(mp_obj_t obj, void *buf, mp_uint_t size, int *errcode);
63+
int errcode;
64+
byte cur;
65+
} ujson_stream_t;
66+
67+
#define S_EOF (0) // null is not allowed in json stream so is ok as EOF marker
68+
#define S_END(s) ((s).cur == S_EOF)
69+
#define S_CUR(s) ((s).cur)
70+
#define S_NEXT(s) (ujson_stream_next(&(s)))
71+
72+
STATIC byte ujson_stream_next(ujson_stream_t *s) {
73+
mp_uint_t ret = s->read(s->stream_obj, &s->cur, 1, &s->errcode);
74+
if (s->errcode != 0) {
75+
mp_raise_OSError(s->errcode);
76+
}
77+
if (ret == 0) {
78+
s->cur = S_EOF;
79+
}
80+
return s->cur;
81+
}
82+
83+
STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) {
84+
const mp_stream_p_t *stream_p = mp_get_stream_raise(stream_obj, MP_STREAM_OP_READ);
85+
ujson_stream_t s = {stream_obj, stream_p->read, 0, 0};
6286
vstr_t vstr;
6387
vstr_init(&vstr, 8);
6488
mp_obj_list_t stack; // we use a list as a simple stack for nested JSON
@@ -67,64 +91,64 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
6791
mp_obj_t stack_top = MP_OBJ_NULL;
6892
mp_obj_type_t *stack_top_type = NULL;
6993
mp_obj_t stack_key = MP_OBJ_NULL;
94+
S_NEXT(s);
7095
for (;;) {
7196
cont:
72-
if (s == top) {
97+
if (S_END(s)) {
7398
break;
7499
}
75100
mp_obj_t next = MP_OBJ_NULL;
76101
bool enter = false;
77-
switch (*s) {
102+
byte cur = S_CUR(s);
103+
S_NEXT(s);
104+
switch (cur) {
78105
case ',':
79106
case ':':
80107
case ' ':
81108
case '\t':
82109
case '\n':
83110
case '\r':
84-
s += 1;
85111
goto cont;
86112
case 'n':
87-
if (s + 3 < top && s[1] == 'u' && s[2] == 'l' && s[3] == 'l') {
88-
s += 4;
113+
if (S_CUR(s) == 'u' && S_NEXT(s) == 'l' && S_NEXT(s) == 'l') {
114+
S_NEXT(s);
89115
next = mp_const_none;
90116
} else {
91117
goto fail;
92118
}
93119
break;
94120
case 'f':
95-
if (s + 4 < top && s[1] == 'a' && s[2] == 'l' && s[3] == 's' && s[4] == 'e') {
96-
s += 5;
121+
if (S_CUR(s) == 'a' && S_NEXT(s) == 'l' && S_NEXT(s) == 's' && S_NEXT(s) == 'e') {
122+
S_NEXT(s);
97123
next = mp_const_false;
98124
} else {
99125
goto fail;
100126
}
101127
break;
102128
case 't':
103-
if (s + 3 < top && s[1] == 'r' && s[2] == 'u' && s[3] == 'e') {
104-
s += 4;
129+
if (S_CUR(s) == 'r' && S_NEXT(s) == 'u' && S_NEXT(s) == 'e') {
130+
S_NEXT(s);
105131
next = mp_const_true;
106132
} else {
107133
goto fail;
108134
}
109135
break;
110136
case '"':
111137
vstr_reset(&vstr);
112-
for (s++; s < top && *s != '"';) {
113-
byte c = *s;
138+
for (; !S_END(s) && S_CUR(s) != '"';) {
139+
byte c = S_CUR(s);
114140
if (c == '\\') {
115-
s++;
116-
c = *s;
141+
c = S_NEXT(s);
117142
switch (c) {
118143
case 'b': c = 0x08; break;
119144
case 'f': c = 0x0c; break;
120145
case 'n': c = 0x0a; break;
121146
case 'r': c = 0x0d; break;
122147
case 't': c = 0x09; break;
123148
case 'u': {
124-
if (s + 4 >= top) { goto fail; }
125149
mp_uint_t num = 0;
126150
for (int i = 0; i < 4; i++) {
127-
c = (*++s | 0x20) - '0';
151+
c = (S_NEXT(s) | 0x20) - '0';
128152
if (c > 9) {
129153
c -= ('a' - ('9' + 1));
130154
}
@@ -137,27 +161,29 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
137161
}
138162
vstr_add_byte(&vstr, c);
139163
str_cont:
140-
s++;
164+
S_NEXT(s);
141165
}
142-
if (s == top) {
166+
if (S_END(s)) {
143167
goto fail;
144168
}
145-
s++;
169+
S_NEXT(s);
146170
next = mp_obj_new_str(vstr.buf, vstr.len, false);
147171
break;
148172
case '-':
149173
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
150174
bool flt = false;
151175
vstr_reset(&vstr);
152-
for (; s < top; s++) {
153-
if (*s == '.' || *s == 'E' || *s == 'e') {
176+
for (;;) {
177+
vstr_add_byte(&vstr, cur);
178+
cur = S_CUR(s);
179+
if (cur == '.' || cur == 'E' || cur == 'e') {
154180
flt = true;
155-
} else if (*s == '-' || unichar_isdigit(*s)) {
181+
} else if (cur == '-' || unichar_isdigit(cur)) {
156182
// pass
157183
} else {
158184
break;
159185
}
160-
vstr_add_byte(&vstr, *s);
186+
S_NEXT(s);
161187
}
162188
if (flt) {
163189
next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false, NULL);
@@ -169,16 +195,13 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
169195
case '[':
170196
next = mp_obj_new_list(0, NULL);
171197
enter = true;
172-
s += 1;
173198
break;
174199
case '{':
175200
next = mp_obj_new_dict(0);
176201
enter = true;
177-
s += 1;
178202
break;
179203
case '}':
180204
case ']': {
181-
s += 1;
182205
if (stack_top == MP_OBJ_NULL) {
183206
// no object at all
184207
goto fail;
@@ -231,10 +254,10 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
231254
}
232255
success:
233256
// eat trailing whitespace
234-
while (s < top && unichar_isspace(*s)) {
235-
s++;
257+
while (unichar_isspace(S_CUR(s))) {
258+
S_NEXT(s);
236259
}
237-
if (s < top) {
260+
if (!S_END(s)) {
238261
// unexpected chars
239262
goto fail;
240263
}
@@ -248,11 +271,21 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
248271
fail:
249272
nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "syntax error in JSON"));
250273
}
274+
STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_load_obj, mod_ujson_load);
275+
276+
STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
277+
mp_uint_t len;
278+
const char *buf = mp_obj_str_get_data(obj, &len);
279+
vstr_t vstr = {len, len, (char*)buf, true};
280+
mp_obj_stringio_t sio = {{&mp_type_stringio}, &vstr, 0};
281+
return mod_ujson_load(&sio);
282+
}
251283
STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_loads_obj, mod_ujson_loads);
252284

253285
STATIC const mp_rom_map_elem_t mp_module_ujson_globals_table[] = {
254286
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_ujson) },
255287
{ MP_ROM_QSTR(MP_QSTR_dumps), MP_ROM_PTR(&mod_ujson_dumps_obj) },
288+
{ MP_ROM_QSTR(MP_QSTR_load), MP_ROM_PTR(&mod_ujson_load_obj) },
256289
{ MP_ROM_QSTR(MP_QSTR_loads), MP_ROM_PTR(&mod_ujson_loads_obj) },
257290
};
258291

py/objstringio.c

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,12 @@
3030

3131
#include "py/nlr.h"
3232
#include "py/objstr.h"
33+
#include "py/objstringio.h"
3334
#include "py/runtime.h"
3435
#include "py/stream.h"
3536

3637
#if MICROPY_PY_IO
3738

38-
typedef struct _mp_obj_stringio_t {
39-
mp_obj_base_t base;
40-
vstr_t *vstr;
41-
// StringIO has single pointer used for both reading and writing
42-
mp_uint_t pos;
43-
} mp_obj_stringio_t;
44-
4539
#if MICROPY_CPYTHON_COMPAT
4640
STATIC void check_stringio_is_open(const mp_obj_stringio_t *o) {
4741
if (o->vstr == NULL) {

py/objstringio.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
* This file is part of the MicroPython project, http://micropython.org/
3+
*
4+
* The MIT License (MIT)
5+
*
6+
* Copyright (c) 2016 Damien P. George
7+
*
8+
* Permission is hereby granted, free of charge, to any person obtaining a copy
9+
* of this software and associated documentation files (the "Software"), to deal
10+
* in the Software without restriction, including without limitation the rights
11+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12+
* copies of the Software, and to permit persons to whom the Software is
13+
* furnished to do so, subject to the following conditions:
14+
*
15+
* The above copyright notice and this permission notice shall be included in
16+
* all copies or substantial portions of the Software.
17+
*
18+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24+
* THE SOFTWARE.
25+
*/
26+
#ifndef MICROPY_INCLUDED_PY_OBJSTRINGIO_H
27+
#define MICROPY_INCLUDED_PY_OBJSTRINGIO_H
28+
29+
#include "py/obj.h"
30+
31+
typedef struct _mp_obj_stringio_t {
32+
mp_obj_base_t base;
33+
vstr_t *vstr;
34+
// StringIO has single pointer used for both reading and writing
35+
mp_uint_t pos;
36+
} mp_obj_stringio_t;
37+
38+
#endif // MICROPY_INCLUDED_PY_OBJSTRINGIO_H

0 commit comments

Comments
 (0)
0