10000 Decode escaped Unicode characters like \u00DE (issue #304, PR #791) · Cube-Line/ArduinoJson@7050ef6 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7050ef6

Browse files
committed
Decode escaped Unicode characters like \u00DE (issue bblanchon#304, PR bblanchon#791)
1 parent 070cd5b commit 7050ef6

File tree

12 files changed

+270
-129
lines changed

12 files changed

+270
-129
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
ArduinoJson: change log
22
=======================
33

4+
HEAD
5+
----
6+
7+
* Decode escaped Unicode characters like \u00DE (issue #304, PR #791)
8+
Many thanks to Daniel Schulte (aka @trilader) who implemented this feature.
9+
* Add option ARDUINOJSON_DECODE_UNICODE to enable it
10+
411
v6.8.0-beta (2019-01-30)
512
-----------
613

src/ArduinoJson/Configuration.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,11 @@
120120
#endif
121121
#endif
122122

123+
// Convert unicode escape sequence (\u0123) to UTF-8
124+
#ifndef ARDUINOJSON_DECODE_UNICODE
125+
#define ARDUINOJSON_DECODE_UNICODE 0
126+
#endif
127+
123128
// Control the exponentiation threshold for big numbers
124129
// CAUTION: cannot be more that 1e9 !!!!
125130
#ifndef ARDUINOJSON_POSITIVE_EXPONENTIATION_THRESHOLD

src/ArduinoJson/Json/JsonDeserializer.hpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "../Polyfills/type_traits.hpp"
1212
#include "../Variant/VariantData.hpp"
1313
#include "EscapeSequence.hpp"
14+
#include "Utf8.hpp"
1415

1516
namespace ARDUINOJSON_NAMESPACE {
1617

@@ -192,7 +193,18 @@ class JsonDeserializer {
192193
if (c == '\\') {
193194
c = current();
194195
if (c == '\0') return DeserializationError::IncompleteInput;
195-
if (c == 'u') return DeserializationError::NotSupported;
196+
if (c == 'u') {
197+
#if ARDUINOJSON_DECODE_UNICODE
198+
uint16_t codepoint;
199+
move();
200+
DeserializationError err = parseCodepoint(codepoint);
201+
if (err) return err;
202+
Utf8::encodeCodepoint(codepoint, builder);
203+
continue;
204+
#else
205+
return DeserializationError::NotSupported;
206+
#endif
207+
}
196208
// replace char
197209
c = EscapeSequence::unescapeChar(c);
198210
if (c == '\0') return DeserializationError::InvalidInput;
@@ -256,6 +268,19 @@ class JsonDeserializer {
256268
return DeserializationError::Ok;
257269
}
258270

271+
DeserializationError parseCodepoint(uint16_t &codepoint) {
272+
codepoint = 0;
273+
for (uint8_t i = 0; i < 4; ++i) {
274+
char digit = current();
275+
if (!digit) return DeserializationError::IncompleteInput;
276+
uint8_t value = decodeHex(digit);
277+
if (value > 0x0F) return DeserializationError::InvalidInput;
278+
codepoint = uint16_t((codepoint << 4) | value);
279+
move();
280+
}
281+
return DeserializationError::Ok;
282+
}
283+
259284
static inline bool isBetween(char c, char min, char max) {
260285
return min <= c && c <= max;
261286
}
@@ -269,6 +294,12 @@ class JsonDeserializer {
269294
return c == '\'' || c == '\"';
270295
}
271296

297+
static inline uint8_t decodeHex(char c) {
298+
if (c < 'A') return uint8_t(c - '0');
299+
c &= ~0x20; // uppercase
300+
return uint8_t(c - 'A' + 10);
301+
}
302+
272303
DeserializationError skipSpacesAndComments() {
273304
for (;;) {
274305
switch (current()) {

src/ArduinoJson/Json/Utf8.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// ArduinoJson - arduinojson.org
2+
// Copyright Benoit Blanchon 2014-2018
3+
// MIT License
4+
5+
#pragma once
6+
7+
namespace ARDUINOJSON_NAMESPACE {
8+
9+
namespace Utf8 {
10+
template <typename TStringBuilder>
11+
inline void encodeCodepoint(uint16_t codepoint, TStringBuilder &str) {
12+
if (codepoint < 0x80) {
13+
str.append(char(codepoint));
14+
return;
15+
}
16+
17+
if (codepoint >= 0x00000800) {
18+
str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12)));
19+
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
20+
} else {
21+
str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
22+
}
23+
str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80));
24+
}
25+
} // namespace Utf8
26+
} // namespace ARDUINOJSON_NAMESPACE

src/ArduinoJson/Namespace.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@
1919
#define ARDUINOJSON_NAMESPACE \
2020
ARDUINOJSON_CONCAT8(ArduinoJson, ARDUINOJSON_VERSION_MAJOR, \
2121
ARDUINOJSON_VERSION_MINOR, ARDUINOJSON_VERSION_REVISION, \
22-
_, ARDUINOJSON_USE_LONG_LONG, _, ARDUINOJSON_USE_DOUBLE)
22+
_, ARDUINOJSON_USE_LONG_LONG, ARDUINOJSON_USE_DOUBLE, \
23+
ARDUINOJSON_DECODE_UNICODE)

test/JsonDeserializer/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ add_executable(JsonDeserializerTests
99
deserializeJsonObject.cpp
1010
deserializeJsonObjectStatic.cpp
1111
deserializeJsonValue.cpp
12+
deserializeJsonString.cpp
1213
input_types.cpp
1314
nestingLimit.cpp
1415
)

test/JsonDeserializer/deserializeJsonObject.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,12 @@ TEST_CASE("deserialize JSON object") {
272272

273273
REQUIRE(err == DeserializationError::Ok);
274274
}
275+
276+
SECTION("Repeated key") {
277+
DeserializationError err = deserializeJson(doc, "{a:{b:{c:1}},a:2}");
278+
279+
REQUIRE(err == DeserializationError::Ok);
280+
}
275281
}
276282

277283
SECTION("Block comments") {
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// ArduinoJson - arduinojson.org
2+
// Copyright Benoit Blanchon 2014-2018
3+
// MIT License
4+
5+
#define ARDUINOJSON_DECODE_UNICODE 1
6+
#include <ArduinoJson.h>
7+
#include <catch.hpp>
8+
9+
using namespace Catch::Matchers;
10+
11+
TEST_CASE("Valid JSON strings value") {
12+
struct TestCase {
13+
const char* input;
14+
const char* expectedOutput;
15+
};
16+
17+
TestCase testCases[] = {
18+
{"\"hello world\"", "hello world"},
19+
{"\'hello world\'", "hello world"},
20+
{"\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"", "1\"2\\3/4\b5\f6\n7\r8\t9"},
21+
{"'\\u0041'", "A"},
22+
{"'\\u00e4'", "\xc3\xa4"}, // ä
23+
{"'\\u00E4'", "\xc3\xa4"}, // ä
24+
{"'\\u3042'", "\xe3\x81\x82"}, //
25+
26+
};
27+
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
28+
29+
DynamicJsonDocument doc(4096);
30+
31+
for (size_t i = 0; i < testCount; i++) {
32+
const TestCase& testCase = testCases[i];
33+
CAPTURE(testCase.input);
34+
DeserializationError err = deserializeJson(doc, testCase.input);
35+
REQUIRE(err == DeserializationError::Ok);
36+
REQUIRE(doc.as<std::string>() == testCase.expectedOutput);
37+
}
38+
}
39+
40+
TEST_CASE("Truncated JSON string") {
41+
const char* testCases[] = {"\"hello", "\'hello", "'\\u", "'\\u00", "'\\u000"};
42+
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
43+
44+
DynamicJsonDocument doc(4096);
45+
46+
for (size_t i = 0; i < testCount; i++) {
47+
const char* input = testCases[i];
48+
CAPTURE(input);
49+
REQUIRE(deserializeJson(doc, input) ==
50+
DeserializationError::IncompleteInput);
51+
}
52+
}
53+
54+
TEST_CASE("Invalid JSON string") {
55+
const char* testCases[] = {"'\\u'", "'\\u000g'", "'\\u000'",
56+
"'\\u000G'", "'\\u000/'", "\\x1234"};
57+
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
58+
59+
DynamicJsonDocument doc(4096);
60+
61+
for (size_t i = 0; i < testCount; i++) {
62+
const char* input = testCases[i];
63+
CAPTURE(input);
64+
REQUIRE(deserializeJson(doc, input) == DeserializationError::InvalidInput);
65+
}
66+
}

0 commit comments

Comments
 (0)
0