From 4bfa630a8bc348d7208065676fdeddb6f4e9e187 Mon Sep 17 00:00:00 2001 From: cpjulia Date: Sat, 18 Sep 2021 13:45:43 -0300 Subject: [PATCH 01/18] Added parser for retaining or escaping control and unicode characters and added unit test --- lib/CMakeLists.txt | 1 + lib/Logger/Escaper.cpp | 201 +++++++++++++++++++++++++++++++++ lib/Logger/Escaper.h | 69 +++++++++++ lib/Logger/LogAppenderFile.cpp | 47 ++++---- lib/Logger/LogAppenderFile.h | 5 +- lib/Logger/Logger.cpp | 17 ++- lib/Logger/Logger.h | 9 +- lib/Logger/LoggerFeature.cpp | 13 ++- lib/Logger/LoggerFeature.h | 3 +- tests/CMakeLists.txt | 5 +- tests/Logger/EscaperTest.cpp | 142 +++++++++++++++++++++++ 11 files changed, 473 insertions(+), 39 deletions(-) create mode 100644 lib/Logger/Escaper.cpp create mode 100644 lib/Logger/Escaper.h create mode 100644 tests/Logger/EscaperTest.cpp diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 4d82bb8356b8..efb9a592104f 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -159,6 +159,7 @@ add_library(arango STATIC Endpoint/EndpointIpV6.cpp Endpoint/EndpointList.cpp Futures/Future.cpp + Logger/Escaper.cpp Logger/LogAppender.cpp Logger/LogAppenderFile.cpp Logger/LogAppenderSyslog.cpp diff --git a/lib/Logger/Escaper.cpp b/lib/Logger/Escaper.cpp new file mode 100644 index 000000000000..5a8bb98af095 --- /dev/null +++ b/lib/Logger/Escaper.cpp @@ -0,0 +1,201 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2021 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Achim Brandt +/// @author Dr. Frank Celler +//////////////////////////////////////////////////////////////////////////////// + +#include "Escaper.h" +#include + +namespace arangodb { + +void ControlCharsSuppressor::writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes) { + *output++ = ' '; +} + +void ControlCharsEscaper::writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes) { + switch (c) { + case '\n': + *output++ = '\\'; + *output++ = 'n'; + break; + + case '\r': + *output++ = '\\'; + *output++ = 'r'; + break; + + case '\t': + *output++ = '\\'; + *output++ = 't'; + break; + + default: { + uint8_t n1 = c >> 4; + uint8_t n2 = c & 0x0F; + + *output++ = '\\'; + *output++ = 'x'; + *output++ = (n1 < 10) ? ('0' + n1) : ('A' + n1 - 10); + *output++ = (n2 < 10) ? ('0' + n2) : ('A' + n2 - 10); + } + } +} + +void UnicodeCharsRetainer::writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes) { + if (numBytes == 2) { + uint16_t num1 = c & 0xffff; + uint8_t num2 = ((num1 >> 6) & 0x1f) | 0xc0; + uint8_t num3 = (num1 & 0x3f) | 0x80; + *output++ = num2; + *output++ = num3; + } else if (numBytes == 3) { + uint16_t num1 = c & 0xffff; + uint8_t num2 = ((num1 >> 12) & 0x0f) | 0xe0; + uint8_t num3 = ((num1 >> 6) & 0x3f) | 0x80; + uint8_t num4 = (num1 & 0x3f) | 0x80; + *output++ = num2; + *output++ = num3; + *output++ = num4; + } else if (numBytes == 4) { + *output++ = ((c >> 18) & 0x07) | 0xF0; + *output++ = ((c >> 12) & 0x3f) | 0x80; + *output++ = ((c >> 6) & 0x3f) | 0x80; + *output++ = (c & 0x3f) | 0x80; + } +} + +void UnicodeCharsEscaper::writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes) { + *output++ = '\\'; + *output++ = 'u'; + + uint16_t i1 = (c & 0xF000) >> 12; + uint16_t i2 = (c & 0x0F00) >> 8; + uint16_t i3 = (c & 0x00F0) >> 4; + uint16_t i4 = (c & 0x000F); + + *output++ = (i1 < 10) ? ('0' + i1) : ('A' + i1 - 10); + *output++ = (i2 < 10) ? ('0' + i2) : ('A' + i2 - 10); + *output++ = (i3 < 10) ? ('0' + i3) : ('A' + i3 - 10); + *output++ = (i4 < 10) ? ('0' + i4) : ('A' + i4 - 10); +} + +template +size_t Escaper::determineOutputBufferSize(std::string const& message) const { + return message.size() * std::max(this->_controlHandler.maxCharLength(), + this->_unicodeHandler.maxCharLength()); +} + +template +void Escaper::writeIntoOutputBuffer(std::string const& message, char*& buffer) { + unsigned char const* p = reinterpret_cast(message.data()); + unsigned char const* end = p + message.length(); + while (p < end) { + unsigned char c = *p; + if (c < 128) { + if (c < 0x20) { + this->_controlHandler.writeCharIntoOutputBuffer(c, buffer, 1); + } else { + *buffer++ = c; + } + // single byte + p++; + } else if (c < 224) { + if ((p + 1) >= end) { + *buffer++ = '?'; + break; + } + uint8_t d = (uint8_t) * (p + 1); + if ((d & 0xC0) == 0x80) { + this->_unicodeHandler.writeCharIntoOutputBuffer(((c & 0x1F) << 6) | (d & 0x3F), buffer, 2); + ++p; + } else { + *buffer++ = '?'; + break; + } + p++; + } else if (c < 240) { + if ((p + 2) >= end) { + *buffer++ = '?'; + break; + } + uint8_t d = (uint8_t) * (p + 1); + if ((d & 0xC0) == 0x80) { + ++p; + uint8_t e = (uint8_t) * (p + 1); + if ((e & 0xC0) == 0x80) { + ++p; + this->_unicodeHandler.writeCharIntoOutputBuffer(((c & 0x0F) << 12) | ((d & 0x3F) << 6) | (e & 0x3F), buffer, 3); + } else { + *buffer++ = '?'; + // break; + } + } else { + *buffer++ = '?'; + // break; + } + p++; + } else if (c < 248) { + if ((p + 3) >= end) { + *buffer++ = '?'; + break; + } + uint8_t d = (uint8_t) * (p + 1); + if ((d & 0xC0) == 0x80) { + ++p; + uint8_t e = (uint8_t) * (p + 1); + if ((e & 0xC0) == 0x80) { + ++p; + uint8_t f = (uint8_t) * (p + 1); + if((f & 0xC0) == 0x80) { + p++; + this->_unicodeHandler.writeCharIntoOutputBuffer( + ((c & 0x07) << 18) | ((d & 0x3F) << 12) | ((e & 0x3F) << 6) | (f & 0x3F), buffer, 4); + } else { + *buffer++ = '?'; + // break; + } + } else { + *buffer++ = '?'; + //break; + } + } else { + *buffer++ = '?'; + // break; + } + p++; + } else { + *buffer++ = '?'; + // invalid UTF-8 sequence + break; + } + } +} + +template class Escaper; + +template class Escaper; + +template class Escaper; + +template class Escaper; + +} // namespace arangodb diff --git a/lib/Logger/Escaper.h b/lib/Logger/Escaper.h new file mode 100644 index 000000000000..4481697efbc1 --- /dev/null +++ b/lib/Logger/Escaper.h @@ -0,0 +1,69 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2021 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Dr. Frank Celler +//////////////////////////////////////////////////////////////////////////////// + + +#pragma once +#include +#include +#include + +namespace arangodb { + +struct ControlCharsSuppressor { //control chars that will not be escaped + size_t maxCharLength() const { return 1; } + void writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes); +}; +struct ControlCharsEscaper { //\x07 worst case + size_t maxCharLength() const { return 4; } + void writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes); +}; +struct UnicodeCharsRetainer { //worst case 4 digits + size_t maxCharLength() const { return 4; } + void writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes); +}; +struct UnicodeCharsEscaper { //\u +4 digits + size_t maxCharLength() const { return 6; } + void writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes); +}; + +class GeneralEscaper { + public: + virtual ~GeneralEscaper() = default; + virtual size_t determineOutputBufferSize(std::string const& message) const = 0; + virtual void writeIntoOutputBuffer(std::string const& message, char*& buffer) = 0; +}; + +template +class Escaper : public GeneralEscaper { + private: + ControlCharHandler _controlHandler; + UnicodeCharHandler _unicodeHandler; + public: + + size_t determineOutputBufferSize(std::string const& message) const override; + + void writeIntoOutputBuffer(std::string const& message, char*& buffer) override; +}; + +} + diff --git a/lib/Logger/LogAppenderFile.cpp b/lib/Logger/LogAppenderFile.cpp index ebbfb7ed3ccf..bd02ab90e0a0 100644 --- a/lib/Logger/LogAppenderFile.cpp +++ b/lib/Logger/LogAppenderFile.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include "Basics/operating-system.h" @@ -58,35 +59,33 @@ LogAppenderStream::LogAppenderStream(std::string const& filename, int fd) _bufferSize(0), _fd(fd), _useColors(false), - _escape(Logger::getUseEscaped()) {} + _controlEscape(Logger::getUseControlEscaped()), + _unicodeEscape(Logger::getUseUnicodeEscaped()) { + if (_controlEscape) { + if (_unicodeEscape) { + _escaper = std::make_unique>(); + } else { + _escaper = std::make_unique>(); + } + } else { + if (_unicodeEscape) { + _escaper = std::make_unique>(); + } else { + _escaper = std::make_unique>(); + } + } +} size_t LogAppenderStream::determineOutputBufferSize(std::string const& message) const { - if (_escape) { - return TRI_MaxLengthEscapeControlsCString(message.size()); - } - return message.size() + 2; + return _escaper->determineOutputBufferSize(message)+ 2; //+2 bytes because it needs to end with '\n' and '\0' } size_t LogAppenderStream::writeIntoOutputBuffer(std::string const& message) { - if (_escape) { - size_t escapedLength = 0; - // this is guaranteed to succeed given that we already have a buffer - TRI_EscapeControlsCString(message.data(), message.size(), _buffer.get(), - &escapedLength, true); - return escapedLength; - } - - unsigned char const* p = reinterpret_cast(message.data()); - unsigned char const* e = p + message.size(); - char* s = _buffer.get(); - char* q = s; - while (p < e) { - unsigned char c = *p++; - *q++ = c < 0x20 ? ' ' : c; - } - *q++ = '\n'; - *q = '\0'; - return q - s; + char* output = _buffer.get(); + _escaper->writeIntoOutputBuffer(message, output); + *output++ = '\n'; + *output = '\0'; + return (output - _buffer.get()); } void LogAppenderStream::logMessage(LogMessage const& message) { diff --git a/lib/Logger/LogAppenderFile.h b/lib/Logger/LogAppenderFile.h index 56cfe3588937..e3023ac38803 100644 --- a/lib/Logger/LogAppenderFile.h +++ b/lib/Logger/LogAppenderFile.h @@ -32,6 +32,7 @@ #include "Logger/LogAppender.h" #include "Logger/LogLevel.h" +#include "Logger/Escaper.h" namespace arangodb { struct LogMessage; @@ -78,7 +79,9 @@ class LogAppenderStream : public LogAppender { bool _useColors; /// @brief whether or not to escape special chars in log output - bool const _escape; + bool const _controlEscape; + bool const _unicodeEscape; + std::unique_ptr _escaper; }; class LogAppenderFile : public LogAppenderStream { diff --git a/lib/Logger/Logger.cpp b/lib/Logger/Logger.cpp index 388e570d0973..11c99c79d3ee 100644 --- a/lib/Logger/Logger.cpp +++ b/lib/Logger/Logger.cpp @@ -120,7 +120,8 @@ bool Logger::_showProcessIdentifier(true); bool Logger::_showThreadIdentifier(false); bool Logger::_showThreadName(false); bool Logger::_useColor(true); -bool Logger::_useEscaped(true); +bool Logger::_useControlEscaped(true); +bool Logger::_useUnicodeEscaped(false); bool Logger::_keepLogRotate(false); bool Logger::_logRequestParameters(true); bool Logger::_showRole(false); @@ -308,14 +309,22 @@ void Logger::setUseColor(bool value) { } // NOTE: this function should not be called if the logging is active. -void Logger::setUseEscaped(bool value) { +void Logger::setUseControlEscaped(bool value) { if (_active) { THROW_ARANGO_EXCEPTION_MESSAGE( TRI_ERROR_INTERNAL, "cannot change settings once logging is active"); + } + _useControlEscaped = value; + } + // NOTE: this function should not be called if the logging is active. + void Logger::setUseUnicodeEscaped(bool value) { + if (_active) { + THROW_ARANGO_EXCEPTION_MESSAGE( + TRI_ERROR_INTERNAL, "cannot change settings once logging is active"); + } + _useUnicodeEscaped = value; } - _useEscaped = value; -} // NOTE: this function should not be called if the logging is active. void Logger::setShowRole(bool show) { diff --git a/lib/Logger/Logger.h b/lib/Logger/Logger.h index 8750fd41092f..1ebe23b4b77c 100644 --- a/lib/Logger/Logger.h +++ b/lib/Logger/Logger.h @@ -277,8 +277,10 @@ class Logger { static void setShowThreadName(bool); static void setUseColor(bool); static bool getUseColor() { return _useColor; }; - static void setUseEscaped(bool); - static bool getUseEscaped() { return _useEscaped; }; + static void setUseControlEscaped(bool); + static void setUseUnicodeEscaped(bool); + static bool getUseControlEscaped() { return _useControlEscaped; }; + static bool getUseUnicodeEscaped() { return _useUnicodeEscaped; }; static bool getUseLocalTime() { return LogTimeFormats::isLocalFormat(_timeFormat); } static void setTimeFormat(LogTimeFormats::TimeFormat); static void setKeepLogrotate(bool); @@ -330,7 +332,8 @@ class Logger { static bool _showThreadName; static bool _showRole; static bool _useColor; - static bool _useEscaped; + static bool _useControlEscaped; + static bool _useUnicodeEscaped; static bool _keepLogRotate; static bool _logRequestParameters; static bool _showIds; diff --git a/lib/Logger/LoggerFeature.cpp b/lib/Logger/LoggerFeature.cpp index 400d7ed07c5a..9a2db515172f 100644 --- a/lib/Logger/LoggerFeature.cpp +++ b/lib/Logger/LoggerFeature.cpp @@ -96,6 +96,7 @@ LoggerFeature::~LoggerFeature() { void LoggerFeature::collectOptions(std::shared_ptr options) { options->addOldOption("log.tty", "log.foreground-tty"); + options->addOldOption("log.escape", "log.escape-control-chars"); options ->addOption("--log", "the global or topic-specific log level", @@ -109,8 +110,12 @@ void LoggerFeature::collectOptions(std::shared_ptr options) { new BooleanParameter(&_useColor), arangodb::options::makeDefaultFlags(arangodb::options::Flags::Dynamic)); - options->addOption("--log.escape", "escape characters when logging", - new BooleanParameter(&_useEscaped)); + options->addOption("--log.escape-control-chars", "escape control characters when logging", + new BooleanParameter(&_useControlEscaped)) + .setIntroducedIn(30900); + options->addOption("--log.escape-unicode-chars", "escape unicode characters when logging", + new BooleanParameter(&_useUnicodeEscaped)) + .setIntroducedIn(30900); options->addOption( "--log.output,-o", @@ -393,7 +398,9 @@ void LoggerFeature::prepare() { Logger::setShowRole(_showRole); Logger::setUseColor(_useColor); Logger::setTimeFormat(LogTimeFormats::formatFromName(_timeFormatString)); - Logger::setUseEscaped(_useEscaped); + //Logger::setUseEscaped(_useEscaped); + Logger::setUseControlEscaped(_useControlEscaped); + Logger::setUseUnicodeEscaped(_useUnicodeEscaped); Logger::setShowLineNumber(_lineNumber); Logger::setShortenFilenames(_shortenFilenames); Logger::setShowProcessIdentifier(_processId); diff --git a/lib/Logger/LoggerFeature.h b/lib/Logger/LoggerFeature.h index 21ac9c3c9b61..34a888024f05 100644 --- a/lib/Logger/LoggerFeature.h +++ b/lib/Logger/LoggerFeature.h @@ -70,7 +70,8 @@ class LoggerFeature final : public application_features::ApplicationFeature { bool _useJson = false; bool _useLocalTime = false; bool _useColor = true; - bool _useEscaped = true; + bool _useControlEscaped = true; + bool _useUnicodeEscaped = true; bool _lineNumber = false; bool _shortenFilenames = true; bool _processId = true; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 048892c92c05..b33fe931843d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -247,6 +247,7 @@ set(ARANGODB_TESTS_SOURCES Graph/SingleServerProviderTest.cpp Greenspun/PrimitivesTest.cpp HotBackup/HotBackupCoordinatorTest.cpp + Logger/EscaperTest.cpp ${ARANGODB_IRESEARCH_TESTS_SOURCES} Maintenance/MaintenanceFeatureTest.cpp Maintenance/MaintenanceRestHandlerTest.cpp @@ -291,9 +292,7 @@ set(ARANGODB_TESTS_SOURCES VPackDeserializer/BasicTests.cpp Zkd/Conversion.cpp Zkd/Library.cpp - ${ADDITIONAL_TEST_SOURCES} -) - + ${ADDITIONAL_TEST_SOURCES}) set(ARANGODB_REPLICATION2_TEST_SOURCES Replication2/ReplicatedLog/AppendEntriesBatchTest.cpp diff --git a/tests/Logger/EscaperTest.cpp b/tests/Logger/EscaperTest.cpp new file mode 100644 index 000000000000..577c765835d5 --- /dev/null +++ b/tests/Logger/EscaperTest.cpp @@ -0,0 +1,142 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2020 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Jan Steemann +/// @author Copyright 2015, ArangoDB GmbH, Cologne, Germany +//////////////////////////////////////////////////////////////////////////////// + +#include "Basics/Common.h" + +#include "gtest/gtest.h" + +#include "Logger/Escaper.h" +#include "Logger/Logger.h" +#include "Logger/LogMacros.h" + +#include +#include +#include +#include + +#ifdef TRI_HAVE_UNISTD_H +#include +#endif + +using namespace arangodb; + +// ----------------------------------------------------------------------------- +// --SECTION-- test suite +// ----------------------------------------------------------------------------- + +template +void verifyExpectedValues(std::string const& inputString, std::string const& expectedOutput, size_t expectedSize, EscaperType& escaper) { + //LOG_DEVEL << "verifyExpectedValues"; + size_t messageSize = escaper.determineOutputBufferSize(inputString); + LOG_DEVEL << "size " << messageSize; + EXPECT_EQ(messageSize, expectedSize); + auto buffer = std::make_unique(messageSize); + char* output = buffer.get(); + escaper.writeIntoOutputBuffer(inputString, output); + size_t outputBufferSize = output - buffer.get(); +// LOG_DEVEL << "output size " << outputBufferSize; + std::string outputString(buffer.get(), outputBufferSize); +// LOG_DEVEL << "output " << outputString << " " << outputString.size(); + EXPECT_EQ(outputString.compare(expectedOutput), 0); + EXPECT_EQ(outputString, expectedOutput); +} + +TEST(EscaperTest, test_suppress_control_retain_unicode) { + Escaper escaper; + // LOG_DEVEL << "SuppressControlRetainUnicode"; + verifyExpectedValues("€", "€", 12, escaper); + verifyExpectedValues(" € ", " € ", 24, escaper); + verifyExpectedValues("mötör", "mötör", 28, escaper); + verifyExpectedValues("\t mötör", " mötör", 36, escaper); + verifyExpectedValues("maçã", "maçã", 24, escaper); + verifyExpectedValues("\nmaçã", " maçã", 28, escaper); + verifyExpectedValues("犬", "犬", 12, escaper); + verifyExpectedValues("犬\r", "犬 ", 16, escaper); + verifyExpectedValues("", "", 0, escaper); + verifyExpectedValues("a", "a", 4, escaper); + std::string validUnicode = "€"; + verifyExpectedValues(validUnicode.substr(0, 1), "?", 4, escaper); + verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "? ", 8, escaper); + verifyExpectedValues("\x07", " ", 4, escaper); + verifyExpectedValues(std::string("\0", 1), " ", 4, escaper); + //invalid unicode: '\ufffe', '\U110000','\ud800', 'test\xFE' + } + +TEST(EscaperTest, test_suppress_control_escape_unicode) { + Escaper escaper; + verifyExpectedValues("€", "\\u20AC", 18, escaper); + verifyExpectedValues(" € ", " \\u20AC ", 36, escaper); + verifyExpectedValues("mötör", "m\\u00F6t\\u00F6r", 42, escaper); + verifyExpectedValues("\tmötör", " m\\u00F6t\\u00F6r", 48, escaper); + verifyExpectedValues("maçã", "ma\\u00E7\\u00E3", 36, escaper); + verifyExpectedValues("\nmaçã", " ma\\u00E7\\u00E3", 42, escaper); + verifyExpectedValues("犬", "\\u72AC", 18, escaper); + verifyExpectedValues("犬\r", "\\u72AC ", 24, escaper); + verifyExpectedValues("", "", 0, escaper); + verifyExpectedValues("a", "a", 6, escaper); + std::string validUnicode = "€"; + verifyExpectedValues(validUnicode.substr(0, 1), "?", 6, escaper); + verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "? ", 12, escaper); + verifyExpectedValues("\x07", " ", 6, escaper); + verifyExpectedValues(std::string("\0", 1), " ", 6, escaper); + +} +TEST(EscaperTest, test_escape_control_retain_unicode) { + Escaper escaper; + verifyExpectedValues("€", "€", 12, escaper); + verifyExpectedValues(" € ", " € ", 24, escaper); + verifyExpectedValues("mötör", "mötör", 28, escaper); + verifyExpectedValues("\tmötör", "\\tmötör", 32, escaper); + verifyExpectedValues("maçã", "maçã", 24, escaper); + verifyExpectedValues("\nmaçã", "\\nmaçã", 28, escaper); + verifyExpectedValues("犬", "犬", 12, escaper); + verifyExpectedValues("犬\r", "犬\\r", 16, escaper); + verifyExpectedValues("", "", 0, escaper); + verifyExpectedValues("a", "a", 4, escaper); + std::string validUnicode = "€"; + verifyExpectedValues(validUnicode.substr(0, 1), "?", 4, escaper); + verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "?\\n", 8, escaper); + verifyExpectedValues("\x07", "\\x07", 4, escaper); + verifyExpectedValues(std::string("\0", 1), "\\x00", 4, escaper); +} +TEST(EscaperTest, test_escape_control_escape_unicode) { + Escaper escaper; + verifyExpectedValues("€", "\\u20AC", 18, escaper); + verifyExpectedValues(" € ", " \\u20AC ", 36, escaper); + verifyExpectedValues("mötör", "m\\u00F6t\\u00F6r", 42, escaper); + verifyExpectedValues("\tmötör", "\\tm\\u00F6t\\u00F6r", 48, escaper); + verifyExpectedValues("maçã", "ma\\u00E7\\u00E3", 36, escaper); + verifyExpectedValues("\nmaçã", "\\nma\\u00E7\\u00E3", 42, escaper); + verifyExpectedValues("犬", "\\u72AC", 18, escaper); + verifyExpectedValues("犬\r", "\\u72AC\\r", 24, escaper); + verifyExpectedValues("", "", 0, escaper); + verifyExpectedValues("a", "a", 6, escaper); + std::string validUnicode = "€"; + verifyExpectedValues(validUnicode.substr(0, 1), "?", 6, escaper); + verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "?\\n", 12, escaper); + verifyExpectedValues("\x07", "\\x07", 6, escaper); + verifyExpectedValues(std::string("\0", 1), "\\x00", 6, escaper); +} + + From feb58b7d085bea43bf6d9bf2f48b4e43a3121d38 Mon Sep 17 00:00:00 2001 From: cpjulia Date: Mon, 20 Sep 2021 01:59:32 -0300 Subject: [PATCH 02/18] Added unicode escaping for 4 bytes representation, parsing for broken unicode and more unit tests --- lib/Logger/Escaper.cpp | 71 ++++++++++++++++++++---------------- lib/Logger/Escaper.h | 1 + tests/Logger/EscaperTest.cpp | 59 +++++++++++++++++------------- 3 files changed, 75 insertions(+), 56 deletions(-) diff --git a/lib/Logger/Escaper.cpp b/lib/Logger/Escaper.cpp index 5a8bb98af095..cac890b02114 100644 --- a/lib/Logger/Escaper.cpp +++ b/lib/Logger/Escaper.cpp @@ -23,7 +23,6 @@ //////////////////////////////////////////////////////////////////////////////// #include "Escaper.h" -#include namespace arangodb { @@ -63,18 +62,13 @@ void ControlCharsEscaper::writeCharIntoOutputBuffer(uint32_t c, char*& output, i void UnicodeCharsRetainer::writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes) { if (numBytes == 2) { uint16_t num1 = c & 0xffff; - uint8_t num2 = ((num1 >> 6) & 0x1f) | 0xc0; - uint8_t num3 = (num1 & 0x3f) | 0x80; - *output++ = num2; - *output++ = num3; + *output++ = ((num1 >> 6) & 0x1f) | 0xc0; + *output++ = (num1 & 0x3f) | 0x80; } else if (numBytes == 3) { uint16_t num1 = c & 0xffff; - uint8_t num2 = ((num1 >> 12) & 0x0f) | 0xe0; - uint8_t num3 = ((num1 >> 6) & 0x3f) | 0x80; - uint8_t num4 = (num1 & 0x3f) | 0x80; - *output++ = num2; - *output++ = num3; - *output++ = num4; + *output++ = ((num1 >> 12) & 0x0f) | 0xe0; + *output++ = ((num1 >> 6) & 0x3f) | 0x80; + *output++ = (num1 & 0x3f) | 0x80; } else if (numBytes == 4) { *output++ = ((c >> 18) & 0x07) | 0xF0; *output++ = ((c >> 12) & 0x3f) | 0x80; @@ -83,7 +77,7 @@ void UnicodeCharsRetainer::writeCharIntoOutputBuffer(uint32_t c, char*& output, } } -void UnicodeCharsEscaper::writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes) { +void UnicodeCharsEscaper::writeCharHelper(uint16_t c, char*& output) { *output++ = '\\'; *output++ = 'u'; @@ -98,14 +92,28 @@ void UnicodeCharsEscaper::writeCharIntoOutputBuffer(uint32_t c, char*& output, i *output++ = (i4 < 10) ? ('0' + i4) : ('A' + i4 - 10); } +void UnicodeCharsEscaper::writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes) { + if (numBytes == 4) { + c -= 0x10000U; + uint16_t high = (uint16_t) (((c & 0xffc00U) >> 10) + 0xd800); + writeCharHelper(high, output); + uint16_t low = (c & 0x3ffU) + 0xdc00U; + writeCharHelper(low, output); + } else { + writeCharHelper(c, output); + } +} + template -size_t Escaper::determineOutputBufferSize(std::string const& message) const { +size_t Escaper::determineOutputBufferSize( + std::string const& message) const { return message.size() * std::max(this->_controlHandler.maxCharLength(), this->_unicodeHandler.maxCharLength()); } template -void Escaper::writeIntoOutputBuffer(std::string const& message, char*& buffer) { +void Escaper::writeIntoOutputBuffer( + std::string const& message, char*& buffer) { unsigned char const* p = reinterpret_cast(message.data()); unsigned char const* end = p + message.length(); while (p < end) { @@ -125,38 +133,39 @@ void Escaper::writeIntoOutputBuffer(std: } uint8_t d = (uint8_t) * (p + 1); if ((d & 0xC0) == 0x80) { - this->_unicodeHandler.writeCharIntoOutputBuffer(((c & 0x1F) << 6) | (d & 0x3F), buffer, 2); + this->_unicodeHandler.writeCharIntoOutputBuffer(((c & 0x1F) << 6) | (d & 0x3F), + buffer, 2); ++p; } else { *buffer++ = '?'; - break; } p++; - } else if (c < 240) { + } else if (c < 240) { if ((p + 2) >= end) { *buffer++ = '?'; - break; - } + p++; + continue; + } uint8_t d = (uint8_t) * (p + 1); if ((d & 0xC0) == 0x80) { ++p; uint8_t e = (uint8_t) * (p + 1); if ((e & 0xC0) == 0x80) { ++p; - this->_unicodeHandler.writeCharIntoOutputBuffer(((c & 0x0F) << 12) | ((d & 0x3F) << 6) | (e & 0x3F), buffer, 3); - } else { + this->_unicodeHandler.writeCharIntoOutputBuffer( + ((c & 0x0F) << 12) | ((d & 0x3F) << 6) | (e & 0x3F), buffer, 3); + } else { *buffer++ = '?'; - // break; } } else { *buffer++ = '?'; - // break; } p++; } else if (c < 248) { if ((p + 3) >= end) { *buffer++ = '?'; - break; + p++; + continue; } uint8_t d = (uint8_t) * (p + 1); if ((d & 0xC0) == 0x80) { @@ -165,21 +174,21 @@ void Escaper::writeIntoOutputBuffer(std: if ((e & 0xC0) == 0x80) { ++p; uint8_t f = (uint8_t) * (p + 1); - if((f & 0xC0) == 0x80) { + if ((f & 0xC0) == 0x80) { p++; - this->_unicodeHandler.writeCharIntoOutputBuffer( - ((c & 0x07) << 18) | ((d & 0x3F) << 12) | ((e & 0x3F) << 6) | (f & 0x3F), buffer, 4); - } else { + this->_unicodeHandler.writeCharIntoOutputBuffer(((c & 0x07) << 18) | + ((d & 0x3F) << 12) | + ((e & 0x3F) << 6) | + (f & 0x3F), + buffer, 4); + } else { *buffer++ = '?'; - // break; } } else { *buffer++ = '?'; - //break; } } else { *buffer++ = '?'; - // break; } p++; } else { diff --git a/lib/Logger/Escaper.h b/lib/Logger/Escaper.h index 4481697efbc1..9e9783468025 100644 --- a/lib/Logger/Escaper.h +++ b/lib/Logger/Escaper.h @@ -44,6 +44,7 @@ struct UnicodeCharsRetainer { //worst case 4 digits struct UnicodeCharsEscaper { //\u +4 digits size_t maxCharLength() const { return 6; } void writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes); + void writeCharHelper(uint16_t c, char*& output); }; class GeneralEscaper { diff --git a/tests/Logger/EscaperTest.cpp b/tests/Logger/EscaperTest.cpp index 577c765835d5..64c569c9cf9a 100644 --- a/tests/Logger/EscaperTest.cpp +++ b/tests/Logger/EscaperTest.cpp @@ -27,13 +27,9 @@ #include "gtest/gtest.h" #include "Logger/Escaper.h" -#include "Logger/Logger.h" -#include "Logger/LogMacros.h" -#include -#include -#include #include +#include #ifdef TRI_HAVE_UNISTD_H #include @@ -45,26 +41,22 @@ using namespace arangodb; // --SECTION-- test suite // ----------------------------------------------------------------------------- -template -void verifyExpectedValues(std::string const& inputString, std::string const& expectedOutput, size_t expectedSize, EscaperType& escaper) { - //LOG_DEVEL << "verifyExpectedValues"; +template +void verifyExpectedValues(std::string const& inputString, std::string const& expectedOutput, + size_t expectedSize, EscaperType& escaper) { size_t messageSize = escaper.determineOutputBufferSize(inputString); - LOG_DEVEL << "size " << messageSize; EXPECT_EQ(messageSize, expectedSize); auto buffer = std::make_unique(messageSize); char* output = buffer.get(); escaper.writeIntoOutputBuffer(inputString, output); size_t outputBufferSize = output - buffer.get(); -// LOG_DEVEL << "output size " << outputBufferSize; std::string outputString(buffer.get(), outputBufferSize); -// LOG_DEVEL << "output " << outputString << " " << outputString.size(); EXPECT_EQ(outputString.compare(expectedOutput), 0); EXPECT_EQ(outputString, expectedOutput); } TEST(EscaperTest, test_suppress_control_retain_unicode) { Escaper escaper; - // LOG_DEVEL << "SuppressControlRetainUnicode"; verifyExpectedValues("€", "€", 12, escaper); verifyExpectedValues(" € ", " € ", 24, escaper); verifyExpectedValues("mötör", "mötör", 28, escaper); @@ -75,13 +67,18 @@ TEST(EscaperTest, test_suppress_control_retain_unicode) { verifyExpectedValues("犬\r", "犬 ", 16, escaper); verifyExpectedValues("", "", 0, escaper); verifyExpectedValues("a", "a", 4, escaper); - std::string validUnicode = "€"; + verifyExpectedValues("𐍈", "𐍈", 16, escaper); //\uD800\uDF48 + verifyExpectedValues("𐍈 ", "𐍈 ", 20, escaper); //\uD800\uDF48 + std::string validUnicode = "€"; verifyExpectedValues(validUnicode.substr(0, 1), "?", 4, escaper); verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "? ", 8, escaper); verifyExpectedValues("\x07", " ", 4, escaper); - verifyExpectedValues(std::string("\0", 1), " ", 4, escaper); - //invalid unicode: '\ufffe', '\U110000','\ud800', 'test\xFE' - } + verifyExpectedValues(std::string("\0", 1), " ", 4, escaper); + validUnicode = "𐍈"; + verifyExpectedValues(validUnicode.substr(0, 1), "?", 4, escaper); + verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "? ", 8, escaper); + // invalid unicode: '\ufffe', '\U110000','\ud800', 'test\xFE' +} TEST(EscaperTest, test_suppress_control_escape_unicode) { Escaper escaper; @@ -95,12 +92,16 @@ TEST(EscaperTest, test_suppress_control_escape_unicode) { verifyExpectedValues("犬\r", "\\u72AC ", 24, escaper); verifyExpectedValues("", "", 0, escaper); verifyExpectedValues("a", "a", 6, escaper); - std::string validUnicode = "€"; + verifyExpectedValues("𐍈", "\\uD800\\uDF48", 24, escaper); //\uD800\uDF48 + verifyExpectedValues("𐍈 ", "\\uD800\\uDF48 ", 30, escaper); //\uD800\uDF48 + std::string validUnicode = "€"; + verifyExpectedValues(validUnicode.substr(0, 1), "?", 6, escaper); + verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "? ", 12, escaper); + validUnicode = "𐍈"; verifyExpectedValues(validUnicode.substr(0, 1), "?", 6, escaper); verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "? ", 12, escaper); verifyExpectedValues("\x07", " ", 6, escaper); - verifyExpectedValues(std::string("\0", 1), " ", 6, escaper); - + verifyExpectedValues(std::string("\0", 1), " ", 6, escaper); } TEST(EscaperTest, test_escape_control_retain_unicode) { Escaper escaper; @@ -114,11 +115,16 @@ TEST(EscaperTest, test_escape_control_retain_unicode) { verifyExpectedValues("犬\r", "犬\\r", 16, escaper); verifyExpectedValues("", "", 0, escaper); verifyExpectedValues("a", "a", 4, escaper); - std::string validUnicode = "€"; + verifyExpectedValues("𐍈", "𐍈", 16, escaper); //\uD800\uDF48 + verifyExpectedValues("𐍈 ", "𐍈 ", 20, escaper); //\uD800\uDF48 + std::string validUnicode = "€"; + verifyExpectedValues(validUnicode.substr(0, 1), "?", 4, escaper); + verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "?\\n", 8, escaper); + validUnicode = "𐍈"; verifyExpectedValues(validUnicode.substr(0, 1), "?", 4, escaper); verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "?\\n", 8, escaper); verifyExpectedValues("\x07", "\\x07", 4, escaper); - verifyExpectedValues(std::string("\0", 1), "\\x00", 4, escaper); + verifyExpectedValues(std::string("\0", 1), "\\x00", 4, escaper); } TEST(EscaperTest, test_escape_control_escape_unicode) { Escaper escaper; @@ -132,11 +138,14 @@ TEST(EscaperTest, test_escape_control_escape_unicode) { verifyExpectedValues("犬\r", "\\u72AC\\r", 24, escaper); verifyExpectedValues("", "", 0, escaper); verifyExpectedValues("a", "a", 6, escaper); - std::string validUnicode = "€"; + verifyExpectedValues("𐍈", "\\uD800\\uDF48", 24, escaper); //\uD800\uDF48 + verifyExpectedValues("𐍈 ", "\\uD800\\uDF48 ", 30, escaper); //\uD800\uDF48 + std::string validUnicode = "€"; + verifyExpectedValues(validUnicode.substr(0, 1), "?", 6, escaper); + verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "?\\n", 12, escaper); + validUnicode = "𐍈"; verifyExpectedValues(validUnicode.substr(0, 1), "?", 6, escaper); verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "?\\n", 12, escaper); verifyExpectedValues("\x07", "\\x07", 6, escaper); - verifyExpectedValues(std::string("\0", 1), "\\x00", 6, escaper); + verifyExpectedValues(std::string("\0", 1), "\\x00", 6, escaper); } - - From f48f87a225296eda836e4db5f0f8dfe27d95254c Mon Sep 17 00:00:00 2001 From: cpjulia Date: Mon, 20 Sep 2021 15:16:30 -0300 Subject: [PATCH 03/18] Added more tests --- lib/Logger/Escaper.cpp | 60 +++++++++++++++++++----------------- tests/Logger/EscaperTest.cpp | 37 +++++++++++++++++++--- 2 files changed, 63 insertions(+), 34 deletions(-) diff --git a/lib/Logger/Escaper.cpp b/lib/Logger/Escaper.cpp index cac890b02114..038247cb9891 100644 --- a/lib/Logger/Escaper.cpp +++ b/lib/Logger/Escaper.cpp @@ -23,6 +23,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "Escaper.h" +#include "Basics/debugging.h" namespace arangodb { @@ -93,7 +94,8 @@ void UnicodeCharsEscaper::writeCharHelper(uint16_t c, char*& output) { } void UnicodeCharsEscaper::writeCharIntoOutputBuffer(uint32_t c, char*& output, int numBytes) { - if (numBytes == 4) { + if (numBytes == 4) { // when the unicode requires 4 bytes for representation, its code is escaped with surrogate pairs, the highest and the lowest bytes of the character + TRI_ASSERT(c >= 0x10000U); c -= 0x10000U; uint16_t high = (uint16_t) (((c & 0xffc00U) >> 10) + 0xd800); writeCharHelper(high, output); @@ -118,80 +120,80 @@ void Escaper::writeIntoOutputBuffer( unsigned char const* end = p + message.length(); while (p < end) { unsigned char c = *p; - if (c < 128) { - if (c < 0x20) { - this->_controlHandler.writeCharIntoOutputBuffer(c, buffer, 1); - } else { + if (c < 128) { // the character is ASCII + if (c < 0x20 || c == 0x7f) { // the character is either control, which comprises codes until 32, or is DEL, which is not a visible character + this->_controlHandler.writeCharIntoOutputBuffer(c, buffer, 1); //retain or escape the control character + } else { // is a visible ascii character *buffer++ = c; } - // single byte p++; - } else if (c < 224) { - if ((p + 1) >= end) { + } else if (c < 224) { // unicode which requires 2 bytes for representation + if ((p + 1) >= end) { // no next byte to represent it, so it's broken unicode *buffer++ = '?'; - break; + p++; + continue; } uint8_t d = (uint8_t) * (p + 1); - if ((d & 0xC0) == 0x80) { + if ((d & 0xC0) == 0x80) { // is within the rules for representing unicode characters for the second byte this->_unicodeHandler.writeCharIntoOutputBuffer(((c & 0x1F) << 6) | (d & 0x3F), - buffer, 2); + buffer, 2); // retain or escape the unicode character represented by 2 bytes ++p; - } else { + } else { // the next byte is broken unicode *buffer++ = '?'; } p++; - } else if (c < 240) { - if ((p + 2) >= end) { + } else if (c < 240) { // unicode which requires 3 bytes for representation + if ((p + 2) >= end) { // there's no 2 other sequential bytes to represent the unicode character, so it's broken unicode *buffer++ = '?'; p++; continue; } uint8_t d = (uint8_t) * (p + 1); - if ((d & 0xC0) == 0x80) { + if ((d & 0xC0) == 0x80) { // second byte is within the rules for representing a unicode character that requires 3 bytes for representation ++p; uint8_t e = (uint8_t) * (p + 1); - if ((e & 0xC0) == 0x80) { + if ((e & 0xC0) == 0x80) { // third byte is within the rules for representing a unicode character that requires 3 bytes for representation ++p; this->_unicodeHandler.writeCharIntoOutputBuffer( - ((c & 0x0F) << 12) | ((d & 0x3F) << 6) | (e & 0x3F), buffer, 3); - } else { + ((c & 0x0F) << 12) | ((d & 0x3F) << 6) | (e & 0x3F), buffer, 3); // retain or escape the unicode character represented by 3 bytes + } else { // second byte is not within the rules for representing a unicode character *buffer++ = '?'; } - } else { + } else { // third byte is not within the rules for representing a unicode character *buffer++ = '?'; } p++; - } else if (c < 248) { - if ((p + 3) >= end) { + } else if (c < 248) { // unicode which requires 4 bytes for representation + if ((p + 3) >= end) { // there's not 3 sequential bytes for representing this unicode character, so it's broken unicode *buffer++ = '?'; p++; continue; } uint8_t d = (uint8_t) * (p + 1); - if ((d & 0xC0) == 0x80) { + if ((d & 0xC0) == 0x80) { // second byte is within the rules for representing a unicode character that requires 3 bytes for representation ++p; uint8_t e = (uint8_t) * (p + 1); - if ((e & 0xC0) == 0x80) { + if ((e & 0xC0) == 0x80) { // third byte is within the rules for representing a unicode character that requires 3 bytes for representation ++p; uint8_t f = (uint8_t) * (p + 1); - if ((f & 0xC0) == 0x80) { + if ((f & 0xC0) == 0x80) { // fourth byte is within the rules for representing a unicode character that requires 3 bytes for representation p++; this->_unicodeHandler.writeCharIntoOutputBuffer(((c & 0x07) << 18) | ((d & 0x3F) << 12) | ((e & 0x3F) << 6) | (f & 0x3F), - buffer, 4); - } else { + buffer, 4); // retain or escape the unicode character represented by 4 bytes + } else { // second byte is not within the rules for representing a unicode character *buffer++ = '?'; } - } else { + } else { // third byte is not within the rules for representing a unicode character *buffer++ = '?'; } - } else { + } else { // fourth byte is not within the rules for representing a unicode character *buffer++ = '?'; } p++; - } else { + } else { // broken unicode, is not ascii and not represented with 2, 3 or 4 bytes *buffer++ = '?'; // invalid UTF-8 sequence break; diff --git a/tests/Logger/EscaperTest.cpp b/tests/Logger/EscaperTest.cpp index 64c569c9cf9a..a3a06874c8de 100644 --- a/tests/Logger/EscaperTest.cpp +++ b/tests/Logger/EscaperTest.cpp @@ -28,6 +28,8 @@ #include "Logger/Escaper.h" +#include "Logger/LogMacros.h" + #include #include @@ -41,6 +43,22 @@ using namespace arangodb; // --SECTION-- test suite // ----------------------------------------------------------------------------- +class EscaperTest : public ::testing::Test { + protected: + std::string asciiVisibleChars; + std::string bigString; + + EscaperTest() { + for (int i = 33; i <= 126; ++i) { + asciiVisibleChars += i; + } + while (bigString.size() < 1000) { + bigString += asciiVisibleChars; + } + } + +}; + template void verifyExpectedValues(std::string const& inputString, std::string const& expectedOutput, size_t expectedSize, EscaperType& escaper) { @@ -55,8 +73,10 @@ void verifyExpectedValues(std::string const& inputString, std::string const& exp EXPECT_EQ(outputString, expectedOutput); } -TEST(EscaperTest, test_suppress_control_retain_unicode) { +TEST_F(EscaperTest, test_suppress_control_retain_unicode) { Escaper escaper; + verifyExpectedValues(asciiVisibleChars, asciiVisibleChars, asciiVisibleChars.size()*4, escaper); + verifyExpectedValues(bigString, bigString, bigString.size()*4, escaper); verifyExpectedValues("€", "€", 12, escaper); verifyExpectedValues(" € ", " € ", 24, escaper); verifyExpectedValues("mötör", "mötör", 28, escaper); @@ -77,11 +97,12 @@ TEST(EscaperTest, test_suppress_control_retain_unicode) { validUnicode = "𐍈"; verifyExpectedValues(validUnicode.substr(0, 1), "?", 4, escaper); verifyExpectedValues(validUnicode.substr(0, 1) + "\n", "? ", 8, escaper); - // invalid unicode: '\ufffe', '\U110000','\ud800', 'test\xFE' } -TEST(EscaperTest, test_suppress_control_escape_unicode) { +TEST_F(EscaperTest, test_suppress_control_escape_unicode) { Escaper escaper; + verifyExpectedValues(asciiVisibleChars, asciiVisibleChars, asciiVisibleChars.size()*6, escaper); + verifyExpectedValues(bigString, bigString, bigString.size()*6, escaper); verifyExpectedValues("€", "\\u20AC", 18, escaper); verifyExpectedValues(" € ", " \\u20AC ", 36, escaper); verifyExpectedValues("mötör", "m\\u00F6t\\u00F6r", 42, escaper); @@ -103,8 +124,11 @@ TEST(EscaperTest, test_suppress_control_escape_unicode) { verifyExpectedValues("\x07", " ", 6, escaper); verifyExpectedValues(std::string("\0", 1), " ", 6, escaper); } -TEST(EscaperTest, test_escape_control_retain_unicode) { + +TEST_F(EscaperTest, test_escape_control_retain_unicode) { Escaper escaper; + verifyExpectedValues(asciiVisibleChars, asciiVisibleChars, asciiVisibleChars.size()*4, escaper); + verifyExpectedValues(bigString, bigString, bigString.size()*4, escaper); verifyExpectedValues("€", "€", 12, escaper); verifyExpectedValues(" € ", " € ", 24, escaper); verifyExpectedValues("mötör", "mötör", 28, escaper); @@ -126,8 +150,11 @@ TEST(EscaperTest, test_escape_control_retain_unicode) { verifyExpectedValues("\x07", "\\x07", 4, escaper); verifyExpectedValues(std::string("\0", 1), "\\x00", 4, escaper); } -TEST(EscaperTest, test_escape_control_escape_unicode) { + +TEST_F(EscaperTest, test_escape_control_escape_unicode) { Escaper escaper; + verifyExpectedValues(asciiVisibleChars, asciiVisibleChars, asciiVisibleChars.size()*6, escaper); + verifyExpectedValues(bigString, bigString, bigString.size()*6, escaper); verifyExpectedValues("€", "\\u20AC", 18, escaper); verifyExpectedValues(" € ", " \\u20AC ", 36, escaper); verifyExpectedValues("mötör", "m\\u00F6t\\u00F6r", 42, escaper); From 4cc1e64ff62b3d74b1975452ee48b74962d5ca39 Mon Sep 17 00:00:00 2001 From: cpjulia Date: Mon, 20 Sep 2021 18:26:43 -0300 Subject: [PATCH 04/18] Removed unused functions, updated CHANGELOG, removed unused include in unit test --- CHANGELOG | 26 +++++++++++++++ lib/Basics/tri-strings.cpp | 62 ------------------------------------ lib/Basics/tri-strings.h | 18 ----------- tests/Logger/EscaperTest.cpp | 1 - 4 files changed, 26 insertions(+), 81 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 96cddf068bd9..f14f588f4a20 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,32 @@ devel ----- +* feature/escape-unicode-control-chars: the server now has two flags for retaining or escaping control and unicode +characters in the log. The flag `log.escape` is deprecated and, instead, +the new flags `--log.escape-control-chars` and `log.escape-unicode-chars` +should be used. + +- `--log.escape-control-chars`: this flag applies to the control characters, which have hex code below `\x20`, and also the character DEL, with hex code of `\x7f`. When its value is set to false, the control +character will be retained, and its actual value will be displayed when it +is a visible character, or a space ` ` character will be displayed if it is +not a visible character. The same will happen to `DEL` character (code `\xF7`), +even though it is not a control character, because it is not visible. For example, +control characer `\n` is visible, so a `\n` will be displayed in the log, and +control character `BEL` is not visible, so a space ` ` would be displayed. +When its value is set to true, the hex code for the character is displayed, for +example, `BEL` character would be displayed as its hex code, `\x07`. + The default value for this flag is `true` for compatibility with +previous versions. + +- `--log.escape-unicode-chars`: when its value is set to false, the unicode character +will be retained, and its actual value will be displayed. For example, `犬` will +be displayed as `犬`. When its value is set to true, the character is escaped, and +the hex code for the character is displayed. For example, `犬` would be displayed +as its hex code, `\u72AC`. + The default value for this flag is set to `false` for compatibility with +previous versions. + + * APM-60: optionally allow special characters and Unicode characters in database names. diff --git a/lib/Basics/tri-strings.cpp b/lib/Basics/tri-strings.cpp index d24c67c59c44..454c55bc678a 100644 --- a/lib/Basics/tri-strings.cpp +++ b/lib/Basics/tri-strings.cpp @@ -303,68 +303,6 @@ char* TRI_SHA256String(char const* source, size_t sourceLen, size_t* dstLen) { return (char*)dst; } -//////////////////////////////////////////////////////////////////////////////// -/// @brief escapes special characters using C escapes -/// the target buffer must have been allocated already and big enough to hold -/// the result of at most (4 * inLength) + 2 bytes! -//////////////////////////////////////////////////////////////////////////////// - -char* TRI_EscapeControlsCString(char const* in, size_t inLength, char* out, - size_t* outLength, bool appendNewline) { - if (out == nullptr) { - return nullptr; - } - - char* qtr = out; - char const* ptr; - char const* end; - - for (ptr = in, end = ptr + inLength; ptr < end; ptr++, qtr++) { - uint8_t n; - - switch (*ptr) { - case '\n': - *qtr++ = '\\'; - *qtr = 'n'; - break; - - case '\r': - *qtr++ = '\\'; - *qtr = 'r'; - break; - - case '\t': - *qtr++ = '\\'; - *qtr = 't'; - break; - - default: - n = (uint8_t)(*ptr); - - if (n < 32) { - uint8_t n1 = n >> 4; - uint8_t n2 = n & 0x0F; - - *qtr++ = '\\'; - *qtr++ = 'x'; - *qtr++ = (n1 < 10) ? ('0' + n1) : ('A' + n1 - 10); - *qtr = (n2 < 10) ? ('0' + n2) : ('A' + n2 - 10); - } else { - *qtr = *ptr; - } - - break; - } - } - - if (appendNewline) { - *qtr++ = '\n'; - } - - *qtr = '\0'; - *outLength = static_cast(qtr - out); - return out; -} //////////////////////////////////////////////////////////////////////////////// /// @brief unescapes unicode escape sequences diff --git a/lib/Basics/tri-strings.h b/lib/Basics/tri-strings.h index e065a5f57639..24cf82470a78 100644 --- a/lib/Basics/tri-strings.h +++ b/lib/Basics/tri-strings.h @@ -109,24 +109,6 @@ void TRI_FreeString(char*) noexcept; char* TRI_SHA256String(char const* source, size_t sourceLen, size_t* dstLen); -//////////////////////////////////////////////////////////////////////////////// -/// @brief returns the maximum result length for an escaped string -/// (4 * inLength) + 2 bytes! -//////////////////////////////////////////////////////////////////////////////// - -constexpr size_t TRI_MaxLengthEscapeControlsCString(size_t inLength) { - return (4 * inLength) + 2; // for newline and 0 byte -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief escapes special characters using C escapes -/// the target buffer must have been allocated already and big enough to hold -/// the result of at most (4 * inLength) + 2 bytes! -//////////////////////////////////////////////////////////////////////////////// - -char* TRI_EscapeControlsCString(char const* in, size_t inLength, char* out, - size_t* outLength, bool appendNewline); - //////////////////////////////////////////////////////////////////////////////// /// @brief unescapes unicode escape sequences /// diff --git a/tests/Logger/EscaperTest.cpp b/tests/Logger/EscaperTest.cpp index a3a06874c8de..3bb4ec153a19 100644 --- a/tests/Logger/EscaperTest.cpp +++ b/tests/Logger/EscaperTest.cpp @@ -28,7 +28,6 @@ #include "Logger/Escaper.h" -#include "Logger/LogMacros.h" #include #include From 3c293659e7114197837f3e09743b02d8ae0c15fc Mon Sep 17 00:00:00 2001 From: cpjulia Date: Mon, 20 Sep 2021 19:08:31 -0300 Subject: [PATCH 05/18] Update tests/Logger/EscaperTest.cpp Co-authored-by: Jan --- tests/Logger/EscaperTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Logger/EscaperTest.cpp b/tests/Logger/EscaperTest.cpp index 3bb4ec153a19..56e36bb101cb 100644 --- a/tests/Logger/EscaperTest.cpp +++ b/tests/Logger/EscaperTest.cpp @@ -18,7 +18,7 @@ /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// -/// @author Jan Steemann +/// @author Julia Puget /// @author Copyright 2015, ArangoDB GmbH, Cologne, Germany //////////////////////////////////////////////////////////////////////////////// From bfa05eb3add7f763741254036fae17fc76de774c Mon Sep 17 00:00:00 2001 From: cpjulia Date: Mon, 20 Sep 2021 19:09:53 -0300 Subject: [PATCH 06/18] Update lib/Logger/LoggerFeature.cpp Co-authored-by: Jan --- lib/Logger/LoggerFeature.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Logger/LoggerFeature.cpp b/lib/Logger/LoggerFeature.cpp index 9a2db515172f..7ec6d3a734f7 100644 --- a/lib/Logger/LoggerFeature.cpp +++ b/lib/Logger/LoggerFeature.cpp @@ -398,7 +398,6 @@ void LoggerFeature::prepare() { Logger::setShowRole(_showRole); Logger::setUseColor(_useColor); Logger::setTimeFormat(LogTimeFormats::formatFromName(_timeFormatString)); - //Logger::setUseEscaped(_useEscaped); Logger::setUseControlEscaped(_useControlEscaped); Logger::setUseUnicodeEscaped(_useUnicodeEscaped); Logger::setShowLineNumber(_lineNumber); From 0ca0d25c2677bc6addf6b9aeddac89af6b2f0fca Mon Sep 17 00:00:00 2001 From: cpjulia Date: Mon, 20 Sep 2021 19:10:06 -0300 Subject: [PATCH 07/18] Update lib/Logger/LoggerFeature.h Co-authored-by: Jan --- lib/Logger/LoggerFeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Logger/LoggerFeature.h b/lib/Logger/LoggerFeature.h index 34a888024f05..ed807abf2435 100644 --- a/lib/Logger/LoggerFeature.h +++ b/lib/Logger/LoggerFeature.h @@ -71,7 +71,7 @@ class LoggerFeature final : public application_features::ApplicationFeature { bool _useLocalTime = false; bool _useColor = true; bool _useControlEscaped = true; - bool _useUnicodeEscaped = true; + bool _useUnicodeEscaped = false; bool _lineNumber = false; bool _shortenFilenames = true; bool _processId = true; From 39f97e1fc04f9fc98f7d1fc2662dd8acade0dc78 Mon Sep 17 00:00:00 2001 From: cpjulia Date: Mon, 20 Sep 2021 19:10:18 -0300 Subject: [PATCH 08/18] Update lib/Logger/Escaper.h Co-authored-by: Jan --- lib/Logger/Escaper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Logger/Escaper.h b/lib/Logger/Escaper.h index 9e9783468025..1b2e8c2bb5f4 100644 --- a/lib/Logger/Escaper.h +++ b/lib/Logger/Escaper.h @@ -18,7 +18,7 @@ /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// -/// @author Dr. Frank Celler +/// @author Julia Puget //////////////////////////////////////////////////////////////////////////////// From 7a3480f4114b8c8a2c166efceb76148d0666eea0 Mon Sep 17 00:00:00 2001 From: cpjulia Date: Mon, 20 Sep 2021 19:10:24 -0300 Subject: [PATCH 09/18] Update lib/Logger/Escaper.cpp Co-authored-by: Jan --- lib/Logger/Escaper.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Logger/Escaper.cpp b/lib/Logger/Escaper.cpp index 038247cb9891..12b5b9987c27 100644 --- a/lib/Logger/Escaper.cpp +++ b/lib/Logger/Escaper.cpp @@ -18,8 +18,7 @@ /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// -/// @author Achim Brandt -/// @author Dr. Frank Celler +/// @author Julia Puget //////////////////////////////////////////////////////////////////////////////// #include "Escaper.h" From 963921b423818762891b55de91055aa7e830eebc Mon Sep 17 00:00:00 2001 From: cpjulia Date: Mon, 20 Sep 2021 19:10:38 -0300 Subject: [PATCH 10/18] Update CHANGELOG Co-authored-by: Jan --- CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index fff95d65afdb..87bbf9ffce17 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,7 +3,7 @@ devel * feature/escape-unicode-control-chars: the server now has two flags for retaining or escaping control and unicode characters in the log. The flag `log.escape` is deprecated and, instead, -the new flags `--log.escape-control-chars` and `log.escape-unicode-chars` +the new flags `--log.escape-control-chars` and `--log.escape-unicode-chars` should be used. - `--log.escape-control-chars`: this flag applies to the control characters, which have hex code below `\x20`, and also the character DEL, with hex code of `\x7f`. When its value is set to false, the control From 015a188bd190f5adce54401257cfe827d7e178b2 Mon Sep 17 00:00:00 2001 From: cpjulia Date: Tue, 21 Sep 2021 09:53:47 -0300 Subject: [PATCH 11/18] Update CHANGELOG Co-authored-by: Jan --- CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 87bbf9ffce17..576e16f02b51 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,7 +2,7 @@ devel ----- * feature/escape-unicode-control-chars: the server now has two flags for retaining or escaping control and unicode -characters in the log. The flag `log.escape` is deprecated and, instead, +characters in the log. The flag `--log.escape` is now deprecated and, instead, the new flags `--log.escape-control-chars` and `--log.escape-unicode-chars` should be used. From a0079268cb14b9438c1dab2ed6dea09abcd5f14c Mon Sep 17 00:00:00 2001 From: cpjulia Date: Tue, 21 Sep 2021 09:54:07 -0300 Subject: [PATCH 12/18] Update CHANGELOG Co-authored-by: Jan --- CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 576e16f02b51..3ae05d2186e9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,7 @@ devel ----- -* feature/escape-unicode-control-chars: the server now has two flags for retaining or escaping control and unicode +* The server now has two flags to control the escaping control and Unicode characters in the log. The flag `--log.escape` is now deprecated and, instead, the new flags `--log.escape-control-chars` and `--log.escape-unicode-chars` should be used. From 2f8f3a2377710d0971d7d9f26235dc8e5c18c16e Mon Sep 17 00:00:00 2001 From: cpjulia Date: Tue, 21 Sep 2021 09:59:08 -0300 Subject: [PATCH 13/18] Updated CHANGELOG --- CHANGELOG | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index fff95d65afdb..1593eb3c3327 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,16 +1,18 @@ devel ----- -* feature/escape-unicode-control-chars: the server now has two flags for retaining or escaping control and unicode -characters in the log. The flag `log.escape` is deprecated and, instead, -the new flags `--log.escape-control-chars` and `log.escape-unicode-chars` -should be used. - -- `--log.escape-control-chars`: this flag applies to the control characters, which have hex code below `\x20`, and also the character DEL, with hex code of `\x7f`. When its value is set to false, the control -character will be retained, and its actual value will be displayed when it -is a visible character, or a space ` ` character will be displayed if it is -not a visible character. The same will happen to `DEL` character (code `\xF7`), -even though it is not a control character, because it is not visible. For example, +* feature/escape-unicode-control-chars: the server now has two flags for +retaining or escaping control and unicode characters in the log. The flag +`log.escape` is deprecated and, instead, the new flags +`--log.escape-control-chars` and `log.escape-unicode-chars` should be used. + +- `--log.escape-control-chars`: this flag applies to the control characters, +which have hex code below `\x20`, and also the character DEL, with hex code +of `\x7f`. When its value is set to false, the control character will be +retained, and its actual value will be displayed when it is a visible +character, or a space ` ` character will be displayed if it is not a visible +character. The same will happen to `DEL` character (code `\xF7`), even though +it is not a control character, because it is not visible. For example, control characer `\n` is visible, so a `\n` will be displayed in the log, and control character `BEL` is not visible, so a space ` ` would be displayed. When its value is set to true, the hex code for the character is displayed, for @@ -18,12 +20,12 @@ example, `BEL` character would be displayed as its hex code, `\x07`. The default value for this flag is `true` for compatibility with previous versions. -- `--log.escape-unicode-chars`: when its value is set to false, the unicode character -will be retained, and its actual value will be displayed. For example, `犬` will -be displayed as `犬`. When its value is set to true, the character is escaped, and -the hex code for the character is displayed. For example, `犬` would be displayed -as its hex code, `\u72AC`. - The default value for this flag is set to `false` for compatibility with +- `--log.escape-unicode-chars`: when its value is set to false, the unicode +character will be retained, and its actual value will be displayed. For +example, `犬` will be displayed as `犬`. When its value is set to true, the +character is escaped, and the hex code for the character is displayed. For +example, `犬` would be displayed as its hex code, `\u72AC`. + The default value for this flag is `false` for compatibility with previous versions. * Added REST API endpoint `/_admin/debug/failat/all` to retrieve the list From 482e186563120fe95267a16f52dda0df6b9946de Mon Sep 17 00:00:00 2001 From: cpjulia Date: Wed, 22 Sep 2021 10:16:02 -0300 Subject: [PATCH 14/18] Update tests/Logger/EscaperTest.cpp Co-authored-by: Jan --- tests/Logger/EscaperTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Logger/EscaperTest.cpp b/tests/Logger/EscaperTest.cpp index 344652c28a26..fa88b78af86b 100644 --- a/tests/Logger/EscaperTest.cpp +++ b/tests/Logger/EscaperTest.cpp @@ -80,7 +80,7 @@ TEST_F(EscaperTest, test_suppress_control_retain_unicode) { Escaper escaper; verifyExpectedValues(asciiVisibleChars, asciiVisibleChars, asciiVisibleChars.size()*4, escaper); verifyExpectedValues(bigString, bigString, bigString.size()*4, escaper); -verifyExpectedValues(controlChars, " ", + verifyExpectedValues(controlChars, " ", controlChars.size()*4, escaper); verifyExpectedValues("€", "€", 12, escaper); verifyExpectedValues(" € ", " € ", 24, escaper); From 0ea373f963f84694f83a9ac1685fb1d901f127c1 Mon Sep 17 00:00:00 2001 From: cpjulia Date: Wed, 22 Sep 2021 10:16:16 -0300 Subject: [PATCH 15/18] Update tests/Logger/EscaperTest.cpp Co-authored-by: Jan --- tests/Logger/EscaperTest.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/Logger/EscaperTest.cpp b/tests/Logger/EscaperTest.cpp index fa88b78af86b..9a17e67bdce3 100644 --- a/tests/Logger/EscaperTest.cpp +++ b/tests/Logger/EscaperTest.cpp @@ -28,14 +28,8 @@ #include "Logger/Escaper.h" - -#include #include -#ifdef TRI_HAVE_UNISTD_H -#include -#endif - using namespace arangodb; // ----------------------------------------------------------------------------- From c9ad897dd39d094110b51e00f447ba21522753e2 Mon Sep 17 00:00:00 2001 From: cpjulia Date: Wed, 22 Sep 2021 10:17:05 -0300 Subject: [PATCH 16/18] Update CHANGELOG Co-authored-by: Jan --- CHANGELOG | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index d0c5409f60b2..9a09c7d5bbf6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,19 +7,19 @@ the new flags `--log.escape-control-chars` and `--log.escape-unicode-chars` should be used. - `--log.escape-control-chars`: this flag applies to the control characters, -which have hex code below `\x20`, and also the character DEL, with hex code of -`\x7f`. When its value is set to false, the control character will be retained, - and its actual value will be displayed when it is a visible character, or a -space ` ` character will be displayed if it is not a visible character. The -same will happen to `DEL` character (code `\xF7`), even though it is not a -control character, because it is not visible. For example, control characer -`\n` is visible, so a `\n` will be displayed in the log, and control character -`BEL` is not visible, so a space ` ` would be displayed. When its value is -set to true, the hex code for the character is displayed, for example, `BEL` -character would be displayed as its hex code, `\x07`. - The default value for this flag is `true` for compatibility with -previous versions. - + which have hex code below `\x20`, and also the character DEL, with hex code of + `\x7f`. When its value is set to false, the control character will be retained, + and its actual value will be displayed when it is a visible character, or a + space ` ` character will be displayed if it is not a visible character. The + same will happen to `DEL` character (code `\xF7`), even though it is not a + control character, because it is not visible. For example, control characer + `\n` is visible, so a `\n` will be displayed in the log, and control character + `BEL` is not visible, so a space ` ` would be displayed. When its value is + set to true, the hex code for the character is displayed, for example, `BEL` + character would be displayed as its hex code, `\x07`. + The default value for this flag is `true` for compatibility with + previous versions. + - `--log.escape-unicode-chars`: when its value is set to false, the unicode character will be retained, and its actual value will be displayed. For example, `犬` will be displayed as `犬`. When its value is set to true, the From 832f56901fa8190f3d5cf6d8679ff3593ea7d0f1 Mon Sep 17 00:00:00 2001 From: cpjulia Date: Wed, 22 Sep 2021 10:17:19 -0300 Subject: [PATCH 17/18] Update CHANGELOG Co-authored-by: Jan --- CHANGELOG | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 9a09c7d5bbf6..3a026f47131a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,9 +2,9 @@ devel ----- * The server now has two flags to control the escaping control and Unicode -characters in the log. The flag `--log.escape` is now deprecated and, instead, -the new flags `--log.escape-control-chars` and `--log.escape-unicode-chars` -should be used. + characters in the log. The flag `--log.escape` is now deprecated and, instead, + the new flags `--log.escape-control-chars` and `--log.escape-unicode-chars` + should be used. - `--log.escape-control-chars`: this flag applies to the control characters, which have hex code below `\x20`, and also the character DEL, with hex code of From 7950a3fb1850b90697fb1bb51af91a4bd97709ea Mon Sep 17 00:00:00 2001 From: cpjulia Date: Wed, 22 Sep 2021 10:19:26 -0300 Subject: [PATCH 18/18] Update CHANGELOG Co-authored-by: Jan --- CHANGELOG | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 3a026f47131a..fc5be3320ab3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -21,12 +21,12 @@ devel previous versions. - `--log.escape-unicode-chars`: when its value is set to false, the unicode -character will be retained, and its actual value will be displayed. For -example, `犬` will be displayed as `犬`. When its value is set to true, the -character is escaped, and the hex code for the character is displayed. For -example, `犬` would be displayed as its hex code, `\u72AC`. - The default value for this flag is `false` for compatibility with -previous versions. + character will be retained, and its actual value will be displayed. For + example, `犬` will be displayed as `犬`. When its value is set to true, the + character is escaped, and the hex code for the character is displayed. For + example, `犬` would be displayed as its hex code, `\u72AC`. + The default value for this flag is `false` for compatibility with + previous versions. * Added REST API endpoint `/_admin/debug/failat/all` to retrieve the list of currently enabled failure points. This API is available only if