From 2305da621eaf243ef5513da67ed74a960b117bb4 Mon Sep 17 00:00:00 2001 From: jsteemann Date: Tue, 3 Aug 2021 16:59:47 +0200 Subject: [PATCH 001/101] first steps - WIP --- arangod/Aql/Ast.cpp | 7 +- arangod/Aql/Collection.cpp | 18 +- arangod/Cluster/SynchronizeShard.cpp | 2 +- .../IResearch/IResearchAnalyzerFeature.cpp | 4 +- arangod/Indexes/Index.cpp | 122 +++++---- arangod/Indexes/Index.h | 18 +- arangod/Indexes/IndexFactory.cpp | 4 +- arangod/Replication/Syncer.cpp | 5 +- arangod/RestHandler/RestAnalyzerHandler.cpp | 8 +- arangod/RestHandler/RestDatabaseHandler.cpp | 9 +- arangod/RestServer/DatabaseFeature.cpp | 9 +- arangod/RestServer/DatabaseFeature.h | 3 + arangod/V8Server/v8-analyzers.cpp | 10 +- arangod/VocBase/LogicalCollection.cpp | 49 +++- arangod/VocBase/LogicalCollection.h | 9 +- arangod/VocBase/LogicalView.cpp | 52 +++- arangod/VocBase/LogicalView.h | 15 +- arangod/VocBase/Methods/Collections.cpp | 3 +- arangod/VocBase/Methods/Databases.cpp | 3 +- arangod/VocBase/Methods/Indexes.cpp | 53 ++-- arangod/VocBase/VocbaseInfo.cpp | 9 +- arangod/VocBase/vocbase.cpp | 100 ++++---- arangod/VocBase/vocbase.h | 10 +- arangosh/Utils/ClientManager.cpp | 5 +- arangosh/V8Client/ArangoClientHelper.cpp | 5 +- .../frontend/js/templates/databaseView.ejs | 4 +- lib/Basics/Utf8Helper.cpp | 16 ++ lib/Basics/Utf8Helper.h | 3 + lib/Rest/HttpRequest.cpp | 51 ++-- tests/js/common/shell/shell-object-names.js | 238 ++++++++++++++++++ 30 files changed, 644 insertions(+), 200 deletions(-) create mode 100644 tests/js/common/shell/shell-object-names.js diff --git a/arangod/Aql/Ast.cpp b/arangod/Aql/Ast.cpp index fcaf6cc9c0c3..c2f08bb109bc 100644 --- a/arangod/Aql/Ast.cpp +++ b/arangod/Aql/Ast.cpp @@ -50,6 +50,7 @@ #include "Cluster/ClusterInfo.h" #include "Containers/SmallVector.h" #include "Graph/Graph.h" +#include "RestServer/DatabaseFeature.h" #include "Transaction/Helpers.h" #include "Utils/CollectionNameResolver.h" #include "VocBase/LogicalCollection.h" @@ -4047,11 +4048,13 @@ AstNode* Ast::createNode(AstNodeType type) { /// in case validation fails, will throw an exception void Ast::validateDataSourceName(arangodb::velocypack::StringRef const& name, bool validateStrict) { + bool allowUnicode = _query.vocbase().server().getFeature().allowUnicodeNames(); + // common validation if (name.empty() || (validateStrict && - !TRI_vocbase_t::IsAllowedName( - true, arangodb::velocypack::StringRef(name.data(), name.size())))) { + !LogicalCollection::isAllowedName( + /*allowSystem*/ true, allowUnicode, name))) { // will throw std::string errorMessage(TRI_errno_string(TRI_ERROR_ARANGO_ILLEGAL_NAME)); errorMessage.append(": "); diff --git a/arangod/Aql/Collection.cpp b/arangod/Aql/Collection.cpp index e30d0bd39923..5552ceed94af 100644 --- a/arangod/Aql/Collection.cpp +++ b/arangod/Aql/Collection.cpp @@ -239,14 +239,16 @@ std::string const& Collection::name() const { // moved here from transaction::Methods::getIndexByIdentifier(..) std::shared_ptr Collection::indexByIdentifier(std::string const& idxId) const { - if (idxId.empty()) { - THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, - "The index id cannot be empty."); - } - - if (!arangodb::Index::validateId(idxId.c_str())) { - THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_INDEX_HANDLE_BAD); - } + if (idxId.empty()) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, + "The index id cannot be empty."); + } + + for (std::size_t i = 0; i < idxId.size(); ++i) { + if (idxId[i] < '0' || idxId[i] > '9') { + THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_INDEX_HANDLE_BAD); + } + } auto iid = arangodb::IndexId{arangodb::basics::StringUtils::uint64(idxId)}; auto idx = this->getCollection()->lookupIndex(iid); diff --git a/arangod/Cluster/SynchronizeShard.cpp b/arangod/Cluster/SynchronizeShard.cpp index 5d621be29b55..c7c0a00ab4c2 100644 --- a/arangod/Cluster/SynchronizeShard.cpp +++ b/arangod/Cluster/SynchronizeShard.cpp @@ -1308,7 +1308,7 @@ Result SynchronizeShard::catchupWithExclusiveLock( options.timeout = network::Timeout(900.0); // this can be slow!!! options.skipScheduler = true; // hack to speed up future.get() - std::string const url = "/_api/collection/" + collection.name() + "/recalculateCount"; + std::string const url = "/_api/collection/" + StringUtils::urlEncode(collection.name()) + "/recalculateCount"; // send out the request auto future = network::sendRequest(pool, ep, fuerte::RestVerb::Put, diff --git a/arangod/IResearch/IResearchAnalyzerFeature.cpp b/arangod/IResearch/IResearchAnalyzerFeature.cpp index 425620dc238a..de44e2b8fccf 100644 --- a/arangod/IResearch/IResearchAnalyzerFeature.cpp +++ b/arangod/IResearch/IResearchAnalyzerFeature.cpp @@ -1520,7 +1520,7 @@ IResearchAnalyzerFeature::IResearchAnalyzerFeature(application_features::Applica // validate analyzer name auto split = splitAnalyzerName(name); - if (!TRI_vocbase_t::IsAllowedName(false, velocypack::StringRef(split.second.c_str(), split.second.size()))) { + if (!TRI_vocbase_t::isAllowedName(/*allowSystem*/ false, /*allowUnicode*/ false, velocypack::StringRef(split.second.c_str(), split.second.size()))) { return { TRI_ERROR_BAD_PARAMETER, std::string("invalid characters in analyzer name '") + std::string(split.second) + "'" @@ -1599,7 +1599,7 @@ Result IResearchAnalyzerFeature::emplaceAnalyzer( // emplace // validate analyzer name auto split = splitAnalyzerName(name); - if (!TRI_vocbase_t::IsAllowedName(false, velocypack::StringRef(split.second.c_str(), split.second.size()))) { + if (!TRI_vocbase_t::isAllowedName(/*allowSystem*/ false, /*allowUnicode*/ false, velocypack::StringRef(split.second.c_str(), split.second.size()))) { return { TRI_ERROR_BAD_PARAMETER, std::string("invalid characters in analyzer name '") + std::string(split.second) + "'" }; diff --git a/arangod/Indexes/Index.cpp b/arangod/Indexes/Index.cpp index 7e09789c0235..c5d884735a67 100644 --- a/arangod/Indexes/Index.cpp +++ b/arangod/Indexes/Index.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "Index.h" @@ -256,6 +257,44 @@ void Index::name(std::string const& newName) { } } +/// @brief checks if an index name is valid +/// returns true if the name is allowed and false otherwise +bool Index::isAllowedName(bool allowUnicode, + arangodb::velocypack::StringRef const& name) noexcept { + size_t length = 0; + + for (char const* ptr = name.data(); length < name.size(); ++ptr, ++length) { + bool ok = true; + + if (allowUnicode) { + // forward slashes are disallowed inside collection names + ok &= (*ptr != '/'); + + if (length == 0) { + // a collection name must not start with a digit, because then it can be confused with + // collection ids + ok &= (*ptr < '0' || *ptr > '9'); + } + } else { + if (length == 0) { + ok &= (*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z'); + } else { + ok &= (*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z') || + (*ptr == '_') || (*ptr == '-') || (*ptr >= '0' && *ptr <= '9'); + } + } + + if (!ok) { + return false; + } + } + + // collection names must be within the expected length limits + return (length > 0 && + length <= maxNameLength && + velocypack::Utf8Helper::isValidUtf8(reinterpret_cast(name.data()), name.size())); +} + size_t Index::sortWeight(arangodb::aql::AstNode const* node) { switch (node->type) { case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ: @@ -381,68 +420,43 @@ char const* Index::oldtypeName(Index::IndexType type) { return ""; } -/// @brief validate an index id -bool Index::validateId(char const* key) { - char const* p = key; - - while (1) { - char const c = *p; - - if (c == '\0') { - return (p - key) > 0; - } - if (c >= '0' && c <= '9') { - ++p; - continue; - } - +/// @brief validate an index handle (collection name + / + index id) +bool Index::validateHandle(bool allowUnicode, arangodb::velocypack::StringRef handle) noexcept { + std::size_t pos = handle.find('/'); + if (pos == std::string::npos) { + // no prefix return false; } -} - -/// @brief validate an index name -bool Index::validateName(char const* key) { - return TRI_vocbase_t::IsAllowedName(false, arangodb::velocypack::StringRef(key, strlen(key))); -} - -namespace { -bool validatePrefix(char const* key, size_t* split) { - char const* p = key; - - // find divider - while (1) { - char c = *p; - - if (c == '\0') { + // check collection name part + if (!LogicalCollection::isAllowedName(/*allowSystem*/ true, allowUnicode, handle.substr(0, pos))) { + return false; + } + // check remainder (index id) + handle = handle.substr(pos + 1); + if (handle.empty()) { + return false; + } + for (std::size_t i = 0; i < handle.size(); ++i) { + if (handle[i] < '0' || handle[i] > '9') { return false; } - - if (c == '/') { - break; - } - - p++; } - - // store split position - *split = p - key; - - return TRI_vocbase_t::IsAllowedName(true, arangodb::velocypack::StringRef(key, *split)); -} -} // namespace - -/// @brief validate an index handle (collection name + / + index id) -bool Index::validateHandle(char const* key, size_t* split) { - bool ok = validatePrefix(key, split); - // validate index id - return ok && validateId(key + *split + 1); + return true; } /// @brief validate an index handle (collection name + / + index name) -bool Index::validateHandleName(char const* key, size_t* split) { - bool ok = validatePrefix(key, split); - // validate index id - return ok && validateName(key + *split + 1); +bool Index::validateHandleName(bool allowUnicode, arangodb::velocypack::StringRef name) noexcept { + std::size_t pos = name.find('/'); + if (pos == std::string::npos) { + // no prefix + return false; + } + // check collection name part + if (!LogicalCollection::isAllowedName(/*allowSystem*/ true, allowUnicode, name.substr(0, pos))) { + return false; + } + // check remainder (index name) + return Index::isAllowedName(allowUnicode, name.substr(pos + 1)); } /// @brief generate a new index id diff --git a/arangod/Indexes/Index.h b/arangod/Indexes/Index.h index b433d1bca282..2b40d5cfd891 100644 --- a/arangod/Indexes/Index.h +++ b/arangod/Indexes/Index.h @@ -143,6 +143,9 @@ class Index { }; public: + /// @brief maximal index name length + static constexpr size_t maxNameLength = 64; + /// @brief return the index id inline IndexId id() const { return _iid; } @@ -156,6 +159,11 @@ class Index { /// @brief set the name, if it is currently unset void name(std::string const&); + + /// @brief checks if an index name is valid + /// returns true if the name is allowed and false otherwise + static bool isAllowedName(bool allowUnicode, + arangodb::velocypack::StringRef const& name) noexcept; /// @brief return the index fields inline std::vector> const& fields() const { @@ -266,17 +274,11 @@ class Index { /// @brief return the name of an index type static char const* oldtypeName(IndexType); - /// @brief validate an index id - static bool validateId(char const*); - - /// @brief validate an index name - static bool validateName(char const*); - /// @brief validate an index handle (collection name + / + index id) - static bool validateHandle(char const*, size_t*); + static bool validateHandle(bool allowUnicode, arangodb::velocypack::StringRef handle) noexcept; /// @brief validate an index handle (by name) (collection name + / + index name) - static bool validateHandleName(char const*, size_t*); + static bool validateHandleName(bool allowUnicode, arangodb::velocypack::StringRef name) noexcept; /// @brief generate a new index id static IndexId generateId(); diff --git a/arangod/Indexes/IndexFactory.cpp b/arangod/Indexes/IndexFactory.cpp index f6757f57a003..fcfe9dd04fcd 100644 --- a/arangod/Indexes/IndexFactory.cpp +++ b/arangod/Indexes/IndexFactory.cpp @@ -32,6 +32,7 @@ #include "Cluster/ServerState.h" #include "Indexes/Index.h" #include "RestServer/BootstrapFeature.h" +#include "RestServer/DatabaseFeature.h" #include "VocBase/LogicalCollection.h" #include @@ -253,7 +254,8 @@ Result IndexFactory::enhanceIndexDefinition( // normalizze deefinition } } - if (!TRI_vocbase_t::IsAllowedName(false, velocypack::StringRef(name))) { + bool allowUnicode = _server.getFeature().allowUnicodeNames(); + if (!Index::isAllowedName(allowUnicode, velocypack::StringRef(name))) { return Result(TRI_ERROR_ARANGO_ILLEGAL_NAME); } diff --git a/arangod/Replication/Syncer.cpp b/arangod/Replication/Syncer.cpp index a07d278c6d7b..42f3f98b4ad4 100644 --- a/arangod/Replication/Syncer.cpp +++ b/arangod/Replication/Syncer.cpp @@ -27,6 +27,7 @@ #include "Basics/MutexLocker.h" #include "Basics/RocksDBUtils.h" #include "Basics/StaticStrings.h" +#include "Basics/StringUtils.h" #include "Basics/VelocyPackHelper.h" #include "GeneralServer/AuthenticationFeature.h" #include "Rest/HttpRequest.h" @@ -505,9 +506,9 @@ std::string Syncer::rewriteLocation(void* data, std::string const& location) { } TRI_ASSERT(!s->_state.databaseName.empty()); if (location[0] == '/') { - return "/_db/" + s->_state.databaseName + location; + return "/_db/" + basics::StringUtils::urlEncode(s->_state.databaseName) + location; } - return "/_db/" + s->_state.databaseName + "/" + location; + return "/_db/" + basics::StringUtils::urlEncode(s->_state.databaseName) + "/" + location; } void Syncer::setAborted(bool value) { _state.connection.setAborted(value); } diff --git a/arangod/RestHandler/RestAnalyzerHandler.cpp b/arangod/RestHandler/RestAnalyzerHandler.cpp index 7bfd0659d12c..e7e243dccdee 100644 --- a/arangod/RestHandler/RestAnalyzerHandler.cpp +++ b/arangod/RestHandler/RestAnalyzerHandler.cpp @@ -91,8 +91,9 @@ void RestAnalyzerHandler::createAnalyzer( // create return; } - if (!TRI_vocbase_t::IsAllowedName(false, velocypack::StringRef(splittedAnalyzerName.second.c_str(), - splittedAnalyzerName.second.size()))) { + if (!TRI_vocbase_t::isAllowedName(/*allowSystem*/ false, /*allowUnicode*/ false, + velocypack::StringRef(splittedAnalyzerName.second.c_str(), + splittedAnalyzerName.second.size()))) { generateError(arangodb::Result( TRI_ERROR_BAD_PARAMETER, "invalid characters in analyzer name '" + static_cast(splittedAnalyzerName.second) + "'" @@ -380,7 +381,8 @@ void RestAnalyzerHandler::removeAnalyzer( auto splittedAnalyzerName = IResearchAnalyzerFeature::splitAnalyzerName(requestedName); auto name = splittedAnalyzerName.second; - if (!TRI_vocbase_t::IsAllowedName(false, velocypack::StringRef(name.c_str(), name.size()))) { + if (!TRI_vocbase_t::isAllowedName(/*allowSystem*/ false, /*allowUnicode*/ false, + velocypack::StringRef(name.c_str(), name.size()))) { generateError(arangodb::Result( TRI_ERROR_BAD_PARAMETER, std::string("Invalid characters in analyzer name '").append(name) diff --git a/arangod/RestHandler/RestDatabaseHandler.cpp b/arangod/RestHandler/RestDatabaseHandler.cpp index ba9d2bea3ac4..cec554f3eef3 100644 --- a/arangod/RestHandler/RestDatabaseHandler.cpp +++ b/arangod/RestHandler/RestDatabaseHandler.cpp @@ -25,6 +25,7 @@ #include "ApplicationFeatures/ApplicationServer.h" #include "Basics/Result.h" +#include "Basics/Utf8Helper.h" #include "Cluster/ClusterFeature.h" #include "Cluster/ClusterInfo.h" #include "Cluster/ServerState.h" @@ -141,7 +142,9 @@ RestStatus RestDatabaseHandler::createDatabase() { events::CreateDatabase("", Result(TRI_ERROR_ARANGO_DATABASE_NAME_INVALID), _context); return RestStatus::DONE; } - std::string dbName = nameVal.copyString(); + VPackValueLength l; + char const* p = nameVal.getString(l); + std::string dbName = normalizeUtf8ToNFC(p, l); VPackSlice options = body.get("options"); VPackSlice users = body.get("users"); @@ -170,14 +173,14 @@ RestStatus RestDatabaseHandler::deleteDatabase() { events::DropDatabase("", Result(TRI_ERROR_ARANGO_USE_SYSTEM_DATABASE), _context); return RestStatus::DONE; } - std::vector const& suffixes = _request->suffixes(); + std::vector const& suffixes = _request->decodedSuffixes(); if (suffixes.size() != 1) { generateError(rest::ResponseCode::BAD, TRI_ERROR_HTTP_BAD_PARAMETER); events::DropDatabase("", Result(TRI_ERROR_HTTP_BAD_PARAMETER), _context); return RestStatus::DONE; } - std::string const& dbName = suffixes[0]; + std::string const dbName = normalizeUtf8ToNFC(suffixes[0].data(), suffixes[0].size()); Result res = methods::Databases::drop(_context, &_vocbase, dbName); if (res.ok()) { diff --git a/arangod/RestServer/DatabaseFeature.cpp b/arangod/RestServer/DatabaseFeature.cpp index 5cda9ce83c33..64bece5b331a 100644 --- a/arangod/RestServer/DatabaseFeature.cpp +++ b/arangod/RestServer/DatabaseFeature.cpp @@ -277,6 +277,7 @@ DatabaseFeature::DatabaseFeature(application_features::ApplicationServer& server _defaultWaitForSync(false), _forceSyncProperties(true), _ignoreDatafileErrors(false), + _allowUnicodeNames(true), _databasesLists(new DatabasesLists()), _isInitiallyEmpty(false), _checkVersion(false), @@ -318,6 +319,11 @@ void DatabaseFeature::collectOptions(std::shared_ptr options) { "load collections even if datafiles may contain errors", new BooleanParameter(&_ignoreDatafileErrors), arangodb::options::makeDefaultFlags(arangodb::options::Flags::Hidden)); + + options->addOption("--database.allow-unicode-names", + "allow Unicode characters in database and collection names", + new BooleanParameter(&_allowUnicodeNames)) + .setIntroducedIn(30900); // the following option was obsoleted in 3.9 options->addObsoleteOption( @@ -629,7 +635,8 @@ Result DatabaseFeature::createDatabase(CreateDatabaseInfo&& info, TRI_vocbase_t* } result = nullptr; - if (!TRI_vocbase_t::IsAllowedName(false, arangodb::velocypack::StringRef(name))) { + bool allowUnicode = allowUnicodeNames(); + if (!TRI_vocbase_t::isAllowedName(/*allowSystem*/ false, allowUnicode, arangodb::velocypack::StringRef(name))) { return {TRI_ERROR_ARANGO_DATABASE_NAME_INVALID}; } diff --git a/arangod/RestServer/DatabaseFeature.h b/arangod/RestServer/DatabaseFeature.h index f7cc85e78616..33f90ca8d78d 100644 --- a/arangod/RestServer/DatabaseFeature.h +++ b/arangod/RestServer/DatabaseFeature.h @@ -140,6 +140,7 @@ class DatabaseFeature : public application_features::ApplicationFeature { bool forceSyncProperties() const { return _forceSyncProperties; } void forceSyncProperties(bool value) { _forceSyncProperties = value; } bool waitForSync() const { return _defaultWaitForSync; } + bool allowUnicodeNames() const { return _allowUnicodeNames; } void enableCheckVersion() { _checkVersion = true; } void enableUpgrade() { _upgrade = true; } @@ -185,6 +186,8 @@ class DatabaseFeature : public application_features::ApplicationFeature { bool _forceSyncProperties; bool _ignoreDatafileErrors; + bool _allowUnicodeNames; + std::unique_ptr _databaseManager; std::atomic _databasesLists; diff --git a/arangod/V8Server/v8-analyzers.cpp b/arangod/V8Server/v8-analyzers.cpp index df12e218df3b..768733af75ba 100644 --- a/arangod/V8Server/v8-analyzers.cpp +++ b/arangod/V8Server/v8-analyzers.cpp @@ -288,8 +288,9 @@ void JS_Create(v8::FunctionCallbackInfo const& args) { return; } - if (!TRI_vocbase_t::IsAllowedName(false, arangodb::velocypack::StringRef(splittedAnalyzerName.second.c_str(), - splittedAnalyzerName.second.size()))) { + if (!TRI_vocbase_t::isAllowedName(/*allowSystem*/ false, /*allowUnicode*/ false, + arangodb::velocypack::StringRef(splittedAnalyzerName.second.c_str(), + splittedAnalyzerName.second.size()))) { TRI_V8_THROW_EXCEPTION_MESSAGE( TRI_ERROR_BAD_PARAMETER, std::string("invalid characters in analyzer name '").append(splittedAnalyzerName.second.c_str()).append("'") @@ -577,8 +578,9 @@ void JS_Remove(v8::FunctionCallbackInfo const& args) { return; } - if (!TRI_vocbase_t::IsAllowedName(false, arangodb::velocypack::StringRef(splittedAnalyzerName.second.c_str(), - splittedAnalyzerName.second.size()))) { + if (!TRI_vocbase_t::isAllowedName(/*allowSystem*/ false, /*allowUnicode*/ false, + arangodb::velocypack::StringRef(splittedAnalyzerName.second.c_str(), + splittedAnalyzerName.second.size()))) { TRI_V8_THROW_EXCEPTION_MESSAGE( TRI_ERROR_BAD_PARAMETER, std::string("Invalid characters in analyzer name '").append(splittedAnalyzerName.second) diff --git a/arangod/VocBase/LogicalCollection.cpp b/arangod/VocBase/LogicalCollection.cpp index 81c56686d0ae..130f7062c913 100644 --- a/arangod/VocBase/LogicalCollection.cpp +++ b/arangod/VocBase/LogicalCollection.cpp @@ -51,6 +51,7 @@ #include #include +#include #include using namespace arangodb; @@ -179,7 +180,8 @@ LogicalCollection::LogicalCollection(TRI_vocbase_t& vocbase, VPackSlice const& i TRI_ASSERT(info.isObject()); - if (!TRI_vocbase_t::IsAllowedName(info)) { + bool allowUnicode = vocbase.server().getFeature().allowUnicodeNames(); + if (!LogicalCollection::isAllowedName(system(), allowUnicode, arangodb::velocypack::StringRef(name()))) { THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_NAME); } @@ -280,6 +282,51 @@ LogicalCollection::LogicalCollection(TRI_vocbase_t& vocbase, VPackSlice const& i return category; } +/// @brief checks if a collection name is valid +/// returns true if the name is allowed and false otherwise +bool LogicalCollection::isAllowedName(bool allowSystem, bool allowUnicode, + arangodb::velocypack::StringRef const& name) noexcept { + size_t length = 0; + + for (char const* ptr = name.data(); length < name.size(); ++ptr, ++length) { + bool ok = true; + + if (allowUnicode) { + // forward slashes are disallowed inside collection names + ok &= (*ptr != '/'); + + if (length == 0) { + // a collection name must not start with a digit, because then it can be confused with + // collection ids + ok &= (*ptr < '0' || *ptr > '9'); + + // a collection name must not start with an underscore unless it is a system collection + ok &= (*ptr != '_' || allowSystem); + + // finally, a collection name must not start with a dot, because this is used for hidden + // agency entries + ok &= (*ptr != '.'); + } + } else { + if (length == 0) { + ok &= (*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z') || (allowSystem && *ptr == '_'); + } else { + ok &= (*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z') || + (*ptr == '_') || (*ptr == '-') || (*ptr >= '0' && *ptr <= '9'); + } + } + + if (!ok) { + return false; + } + } + + // collection names must be within the expected length limits + return (length > 0 && + length <= maxNameLength && + velocypack::Utf8Helper::isValidUtf8(reinterpret_cast(name.data()), name.size())); +} + Result LogicalCollection::updateSchema(VPackSlice schema) { using namespace std::literals::string_literals; if (schema.isNone()) { diff --git a/arangod/VocBase/LogicalCollection.h b/arangod/VocBase/LogicalCollection.h index 0c276d6f56d0..0d84f1013d16 100644 --- a/arangod/VocBase/LogicalCollection.h +++ b/arangod/VocBase/LogicalCollection.h @@ -83,7 +83,7 @@ class LogicalCollection : public LogicalDataSource { LogicalCollection& operator=(LogicalCollection const&) = delete; ~LogicalCollection() override; - /// @brief maximal collection name length + /// @brief maximal collection name length, in bytes static constexpr size_t maxNameLength = 256; enum class Version { v30 = 5, v31 = 6, v33 = 7, v34 = 8, v37 = 9 }; @@ -105,9 +105,12 @@ class LogicalCollection : public LogicalDataSource { RemoteSmartEdge = 4, }; - ////////////////////////////////////////////////////////////////////////////// + /// @brief checks if a collection name is valid + /// returns true if the name is allowed and false otherwise + static bool isAllowedName(bool allowSystem, bool allowUnicode, + arangodb::velocypack::StringRef const& name) noexcept; + /// @brief the category representing a logical collection - ////////////////////////////////////////////////////////////////////////////// static Category const& category() noexcept; /// @brief hard-coded minimum version number for collections diff --git a/arangod/VocBase/LogicalView.cpp b/arangod/VocBase/LogicalView.cpp index db193d49f776..8de2a828d70e 100644 --- a/arangod/VocBase/LogicalView.cpp +++ b/arangod/VocBase/LogicalView.cpp @@ -35,11 +35,13 @@ #include "StorageEngine/StorageEngine.h" #include "Utils/Events.h" #include "Utils/ExecContext.h" +#include "VocBase/LogicalCollection.h" #include "VocBase/ticks.h" #include "VocBase/vocbase.h" #include #include +#include #include namespace arangodb { @@ -63,7 +65,9 @@ LogicalView::LogicalView(TRI_vocbase_t& vocbase, VPackSlice const& definition) "got an invalid view definition while constructing LogicalView"); } - if (!TRI_vocbase_t::IsAllowedName(definition)) { + bool allowUnicode = vocbase.server().getFeature().allowUnicodeNames(); + + if (!LogicalView::isAllowedName(/*allowSystem*/ false, allowUnicode, arangodb::velocypack::StringRef(name()))) { THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_NAME); } @@ -77,6 +81,52 @@ LogicalView::LogicalView(TRI_vocbase_t& vocbase, VPackSlice const& definition) TRI_UpdateTickServer(id().id()); } +/// @brief checks if a view name is valid +/// returns true if the name is allowed and false otherwise +bool LogicalView::isAllowedName(bool allowSystem, bool allowUnicode, + arangodb::velocypack::StringRef const& name) noexcept { + size_t length = 0; + + for (char const* ptr = name.data(); length < name.size(); ++ptr, ++length) { + bool ok = true; + + if (allowUnicode) { + // forward slashes are disallowed inside view names + ok &= (*ptr != '/'); + + if (length == 0) { + // a view name must not start with a digit, because then it can be confused with + // view ids + ok &= (*ptr < '0' || *ptr > '9'); + + // a view name must not start with an underscore unless + // (which is not created via any checked API) + ok &= (*ptr != '_' || allowSystem); + + // finally, a view name must not start with a dot, because this is used for hidden + // agency entries + ok &= (*ptr != '.'); + } + } else { + if (length == 0) { + ok &= (*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z') || (allowSystem && *ptr == '_'); + } else { + ok &= (*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z') || + (*ptr == '_') || (*ptr == '-') || (*ptr >= '0' && *ptr <= '9'); + } + } + + if (!ok) { + return false; + } + } + + // collection names must be within the expected length limits + return (length > 0 && + ((!allowUnicode && length <= maxNameLength) || (allowUnicode && length <= maxNameLengthUnicode)) && + velocypack::Utf8Helper::isValidUtf8(reinterpret_cast(name.data()), name.size())); +} + Result LogicalView::appendVelocyPack(velocypack::Builder& builder, Serialization context) const { if (!builder.isOpenObject()) { diff --git a/arangod/VocBase/LogicalView.h b/arangod/VocBase/LogicalView.h index d664af877f0b..03ef4b66703f 100644 --- a/arangod/VocBase/LogicalView.h +++ b/arangod/VocBase/LogicalView.h @@ -39,8 +39,9 @@ namespace arangodb { namespace velocypack { -class Slice; class Builder; +class Slice; +class StringRef; } // namespace velocypack } // arangodb @@ -101,6 +102,11 @@ class LogicalView : public LogicalDataSource { return static_cast(view); #endif } + + /// @brief checks if a view name is allowed + /// returns true if the name is allowed and false otherwise + static bool isAllowedName(bool allowSystem, bool allowUnicode, + arangodb::velocypack::StringRef const& name) noexcept; ////////////////////////////////////////////////////////////////////////////// /// @brief queries properties of an existing view @@ -187,6 +193,13 @@ class LogicalView : public LogicalDataSource { /// including persistance of properties ////////////////////////////////////////////////////////////////////////////// virtual Result renameImpl(std::string const& oldName) = 0; + + /// @brief maximal view name length, in bytes (old convention, used when + /// `--database.allow-unicode-names=false`) + static constexpr size_t maxNameLength = 64; + /// @brief maximal view name length, in bytes (new convention, used when + /// `--database.allow-unicode-names=true`) + static constexpr size_t maxNameLengthUnicode = 256; private: // FIXME seems to be ugly diff --git a/arangod/VocBase/Methods/Collections.cpp b/arangod/VocBase/Methods/Collections.cpp index 7b63a990dc21..2b5106e7e339 100644 --- a/arangod/VocBase/Methods/Collections.cpp +++ b/arangod/VocBase/Methods/Collections.cpp @@ -756,7 +756,8 @@ Result Collections::rename(LogicalCollection& collection, "a system collection name"); } - if (!TRI_vocbase_t::IsAllowedName(isSystem, arangodb::velocypack::StringRef(newName))) { + bool allowUnicode = collection.vocbase().server().getFeature().allowUnicodeNames(); + if (!LogicalCollection::isAllowedName(isSystem, allowUnicode, arangodb::velocypack::StringRef(newName))) { return TRI_ERROR_ARANGO_ILLEGAL_NAME; } } diff --git a/arangod/VocBase/Methods/Databases.cpp b/arangod/VocBase/Methods/Databases.cpp index b8fa8ec4aa59..261e91a5f9e7 100644 --- a/arangod/VocBase/Methods/Databases.cpp +++ b/arangod/VocBase/Methods/Databases.cpp @@ -174,7 +174,8 @@ arangodb::Result Databases::grantCurrentUser(CreateDatabaseInfo const& info, int Result Databases::createCoordinator(CreateDatabaseInfo const& info) { TRI_ASSERT(ServerState::instance()->isCoordinator()); - if (!TRI_vocbase_t::IsAllowedName(/*_isSystemDB*/ false, arangodb::velocypack::StringRef(info.getName()))) { + bool allowUnicode = info.server().getFeature().allowUnicodeNames(); + if (!TRI_vocbase_t::isAllowedName(/*allowSystem*/ false, allowUnicode, arangodb::velocypack::StringRef(info.getName()))) { return Result(TRI_ERROR_ARANGO_DATABASE_NAME_INVALID); } diff --git a/arangod/VocBase/Methods/Indexes.cpp b/arangod/VocBase/Methods/Indexes.cpp index d1b3e4abe0b2..3937f9a69e19 100644 --- a/arangod/VocBase/Methods/Indexes.cpp +++ b/arangod/VocBase/Methods/Indexes.cpp @@ -510,7 +510,7 @@ arangodb::Result Indexes::createIndex(LogicalCollection* coll, Index::IndexType /// @brief checks if argument is an index identifier //////////////////////////////////////////////////////////////////////////////// -static bool ExtractIndexHandle(VPackSlice const& arg, +static bool ExtractIndexHandle(VPackSlice const& arg, bool allowUnicode, std::string& collectionName, IndexId& iid) { TRI_ASSERT(collectionName.empty()); TRI_ASSERT(iid.empty()); @@ -525,26 +525,30 @@ static bool ExtractIndexHandle(VPackSlice const& arg, return false; } - std::string str = arg.copyString(); - size_t split; - if (arangodb::Index::validateHandle(str.data(), &split)) { - collectionName = std::string(str.data(), split); - iid = IndexId{StringUtils::uint64(str.data() + split + 1, str.length() - split - 1)}; + arangodb::velocypack::StringRef handle = arg.stringRef(); + if (arangodb::Index::validateHandle(allowUnicode, handle)) { + std::size_t split = handle.find('/'); + TRI_ASSERT(split != std::string::npos); + collectionName = std::string(handle.data(), split); + iid = IndexId{StringUtils::uint64(handle.data() + split + 1, handle.size() - split - 1)}; return true; } - if (arangodb::Index::validateId(str.data())) { - iid = IndexId{StringUtils::uint64(str)}; - return true; + for (std::size_t i = 0; i < handle.size(); ++i) { + if (handle[i] < '0' || handle[i] > '9') { + return false; + } } - return false; + iid = IndexId{StringUtils::uint64(handle.data(), handle.size())}; + return true; } //////////////////////////////////////////////////////////////////////////////// /// @brief checks if argument is an index name //////////////////////////////////////////////////////////////////////////////// -static bool ExtractIndexName(VPackSlice const& arg, std::string& collectionName, +static bool ExtractIndexName(VPackSlice const& arg, bool allowUnicode, + std::string& collectionName, std::string& name) { TRI_ASSERT(collectionName.empty()); TRI_ASSERT(name.empty()); @@ -552,17 +556,18 @@ static bool ExtractIndexName(VPackSlice const& arg, std::string& collectionName, if (!arg.isString()) { return false; } - - std::string str = arg.copyString(); - size_t split; - if (arangodb::Index::validateHandleName(str.data(), &split)) { - collectionName = std::string(str.data(), split); - name = std::string(str.data() + split + 1, str.length() - split - 1); + + arangodb::velocypack::StringRef handle = arg.stringRef(); + if (arangodb::Index::validateHandleName(allowUnicode, handle)) { + std::size_t split = handle.find('/'); + TRI_ASSERT(split != std::string::npos); + collectionName = std::string(handle.data(), split); + name = std::string(handle.data() + split + 1, handle.size() - split - 1); return true; } - if (arangodb::Index::validateName(str.data())) { - name = str; + if (arangodb::Index::isAllowedName(allowUnicode, handle)) { + name = std::string(handle.data(), handle.size()); return true; } return false; @@ -581,10 +586,12 @@ Result Indexes::extractHandle(arangodb::LogicalCollection const* collection, // assume we are already loaded TRI_ASSERT(collection != nullptr); + bool allowUnicode = collection->vocbase().server().getFeature().allowUnicodeNames(); + // extract the index identifier from a string if (val.isString() || val.isNumber()) { - if (!ExtractIndexHandle(val, collectionName, iid) && - !ExtractIndexName(val, collectionName, name)) { + if (!ExtractIndexHandle(val, allowUnicode, collectionName, iid) && + !ExtractIndexName(val, allowUnicode, collectionName, name)) { return Result(TRI_ERROR_ARANGO_INDEX_HANDLE_BAD); } } @@ -592,9 +599,9 @@ Result Indexes::extractHandle(arangodb::LogicalCollection const* collection, // extract the index identifier from an object else if (val.isObject()) { VPackSlice iidVal = val.get(StaticStrings::IndexId); - if (!ExtractIndexHandle(iidVal, collectionName, iid)) { + if (!ExtractIndexHandle(iidVal, allowUnicode, collectionName, iid)) { VPackSlice nameVal = val.get(StaticStrings::IndexName); - if (!ExtractIndexName(nameVal, collectionName, name)) { + if (!ExtractIndexName(nameVal, allowUnicode, collectionName, name)) { return Result(TRI_ERROR_ARANGO_INDEX_HANDLE_BAD); } } diff --git a/arangod/VocBase/VocbaseInfo.cpp b/arangod/VocBase/VocbaseInfo.cpp index 5d3434115976..c19f7cd2d18b 100644 --- a/arangod/VocBase/VocbaseInfo.cpp +++ b/arangod/VocBase/VocbaseInfo.cpp @@ -30,6 +30,7 @@ #include "Cluster/ClusterInfo.h" #include "Cluster/ServerState.h" #include "Logger/LogMacros.h" +#include "RestServer/DatabaseFeature.h" #include "Utils/Events.h" namespace arangodb { @@ -290,14 +291,10 @@ Result CreateDatabaseInfo::checkOptions() { _validId = false; } - // we cannot use IsAllowedName for database name length validation alone, because - // IsAllowedName allows up to 256 characters. Database names are just up to 64 - // chars long, as their names are also used as filesystem directories (for Foxx apps) bool isSystem = _name == StaticStrings::SystemDatabase; + bool allowUnicode = _server.getFeature().allowUnicodeNames(); - if (_name.empty() || - !TRI_vocbase_t::IsAllowedName(isSystem, arangodb::velocypack::StringRef(_name)) || - _name.size() > 64) { + if (!TRI_vocbase_t::isAllowedName(isSystem, allowUnicode, arangodb::velocypack::StringRef(_name))) { return Result(TRI_ERROR_ARANGO_DATABASE_NAME_INVALID); } diff --git a/arangod/VocBase/vocbase.cpp b/arangod/VocBase/vocbase.cpp index 0825d3e4c31e..b47adcafd27c 100644 --- a/arangod/VocBase/vocbase.cpp +++ b/arangod/VocBase/vocbase.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -392,7 +393,7 @@ bool TRI_vocbase_t::unregisterView(arangodb::LogicalView const& view) { /// @brief creates a new collection, worker function std::shared_ptr TRI_vocbase_t::createCollectionWorker(VPackSlice parameters) { std::string name = - arangodb::basics::VelocyPackHelper::getStringValue(parameters, "name", ""); + arangodb::basics::VelocyPackHelper::getStringValue(parameters, StaticStrings::DataSourceName, ""); TRI_ASSERT(!name.empty()); std::string const& dbName = _info.getName(); @@ -884,14 +885,18 @@ std::shared_ptr TRI_vocbase_t::createCollection( arangodb::velocypack::Slice parameters) { // check that the name does not contain any strange characters - if (!IsAllowedName(parameters)) { - std::string name; - std::string const& dbName = _info.getName(); - if (parameters.isObject()) { - name = VelocyPackHelper::getStringValue(parameters, - StaticStrings::DataSourceName, ""); - } + std::string name; + bool valid = parameters.isObject(); + if (valid) { + name = VelocyPackHelper::getStringValue(parameters, + StaticStrings::DataSourceName, ""); + bool isSystem = VelocyPackHelper::getBooleanValue(parameters, StaticStrings::DataSourceSystem, false); + bool allowUnicode = server().getFeature().allowUnicodeNames(); + valid &= LogicalCollection::isAllowedName(isSystem, allowUnicode, arangodb::velocypack::StringRef(name)); + } + if (!valid) { + std::string const& dbName = _info.getName(); events::CreateCollection(dbName, name, TRI_ERROR_ARANGO_ILLEGAL_NAME); THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_NAME); } @@ -1021,7 +1026,8 @@ arangodb::Result TRI_vocbase_t::renameView(DataSourceId cid, std::string const& return TRI_ERROR_NO_ERROR; } - if (!IsAllowedName(IsSystemName(newName), arangodb::velocypack::StringRef(newName))) { + bool allowUnicode = databaseFeature.allowUnicodeNames(); + if (!LogicalView::isAllowedName(/*allowSystem*/ false, allowUnicode, arangodb::velocypack::StringRef(newName))) { return TRI_set_errno(TRI_ERROR_ARANGO_ILLEGAL_NAME); } @@ -1231,15 +1237,18 @@ std::shared_ptr TRI_vocbase_t::createView(arangodb::veloc TRI_ASSERT(!ServerState::instance()->isCoordinator()); auto& engine = server().getFeature().engine(); std::string const& dbName = _info.getName(); + + std::string name; + bool valid = parameters.isObject(); + if (valid) { + name = VelocyPackHelper::getStringValue(parameters, + StaticStrings::DataSourceName, ""); + bool allowUnicode = server().getFeature().allowUnicodeNames(); + valid &= LogicalView::isAllowedName(/*allowSystem*/ false, allowUnicode, arangodb::velocypack::StringRef(name)); + } - // check that the name does not contain any strange characters - if (!IsAllowedName(parameters)) { - std::string n; - if (parameters.isObject()) { - n = VelocyPackHelper::getStringValue(parameters, - StaticStrings::DataSourceName, ""); - } - events::CreateView(dbName, n, TRI_ERROR_ARANGO_ILLEGAL_NAME); + if (!valid) { + events::CreateView(dbName, name, TRI_ERROR_ARANGO_ILLEGAL_NAME); THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_NAME); } @@ -1247,12 +1256,7 @@ std::shared_ptr TRI_vocbase_t::createView(arangodb::veloc auto res = LogicalView::instantiate(view, *this, parameters); if (!res.ok() || !view) { - std::string n; - if (parameters.isObject()) { - n = VelocyPackHelper::getStringValue(parameters, - StaticStrings::DataSourceName, ""); - } - events::CreateView(dbName, n, res.errorNumber()); + events::CreateView(dbName, name, res.errorNumber()); THROW_ARANGO_EXCEPTION_MESSAGE( res.errorNumber(), res.errorMessage().empty() @@ -1442,30 +1446,39 @@ std::uint32_t TRI_vocbase_t::writeConcern() const { return _info.writeConcern(); } -bool TRI_vocbase_t::IsAllowedName(arangodb::velocypack::Slice slice) noexcept { - return !slice.isObject() - ? false - : IsAllowedName(arangodb::basics::VelocyPackHelper::getBooleanValue( - slice, StaticStrings::DataSourceSystem, false), - arangodb::basics::VelocyPackHelper::getStringRef( - slice, StaticStrings::DataSourceName, VPackStringRef())); -} - -/// @brief checks if a database name is allowed +/// @brief checks if a database name is valid /// returns true if the name is allowed and false otherwise -bool TRI_vocbase_t::IsAllowedName(bool allowSystem, +bool TRI_vocbase_t::isAllowedName(bool allowSystem, bool allowUnicode, arangodb::velocypack::StringRef const& name) noexcept { size_t length = 0; - // check allow characters: must start with letter or underscore if system is - // allowed for (char const* ptr = name.data(); length < name.size(); ++ptr, ++length) { - bool ok; - if (length == 0) { - ok = ('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z') || (allowSystem && *ptr == '_'); + bool ok = true; + + if (allowUnicode) { + // forward slashes are disallowed inside database names + ok &= (*ptr != '/'); + + if (length == 0) { + // a database name must not start with a digit, because then it can be confused with + // database ids + ok &= (*ptr < '0' || *ptr > '9'); + + // a database name must not start with an underscore unless it is the system database + // (which is not created via any checked API) + ok &= (*ptr != '_' || allowSystem); + + // finally, a database name must not start with a dot, because this is used for hidden + // agency entries + ok &= (*ptr != '.'); + } } else { - ok = ('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z') || - (*ptr == '_') || (*ptr == '-') || ('0' <= *ptr && *ptr <= '9'); + if (length == 0) { + ok &= (*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z') || (allowSystem && *ptr == '_'); + } else { + ok &= (*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z') || + (*ptr == '_') || (*ptr == '-') || (*ptr >= '0' && *ptr <= '9'); + } } if (!ok) { @@ -1473,7 +1486,10 @@ bool TRI_vocbase_t::IsAllowedName(bool allowSystem, } } - return (length > 0 && length <= LogicalCollection::maxNameLength); + // collection names must be within the expected length limits + return (length > 0 && + ((!allowUnicode && length <= maxNameLength) || (allowUnicode && length <= maxNameLengthUnicode)) && + velocypack::Utf8Helper::isValidUtf8(reinterpret_cast(name.data()), name.size())); } /// @brief determine whether a collection name is a system collection name diff --git a/arangod/VocBase/vocbase.h b/arangod/VocBase/vocbase.h index db1c6ce6ec4c..3b00d1e87dec 100644 --- a/arangod/VocBase/vocbase.h +++ b/arangod/VocBase/vocbase.h @@ -111,6 +111,13 @@ struct TRI_vocbase_t { TRI_vocbase_t(TRI_vocbase_type_e type, arangodb::CreateDatabaseInfo&&); TEST_VIRTUAL ~TRI_vocbase_t(); + + /// @brief maximal database name length, in bytes (old convention, used when + /// `--database.allow-unicode-names=false`) + static constexpr size_t maxNameLength = 64; + /// @brief maximal database name length, in bytes (new convention, used when + /// `--database.allow-unicode-names=true`) + static constexpr size_t maxNameLengthUnicode = 256; private: // explicitly document implicit behavior (due to presence of locks) @@ -168,8 +175,7 @@ struct TRI_vocbase_t { public: /// @brief checks if a database name is allowed /// returns true if the name is allowed and false otherwise - static bool IsAllowedName(arangodb::velocypack::Slice slice) noexcept; - static bool IsAllowedName(bool allowSystem, + static bool isAllowedName(bool allowSystem, bool allowUnicode, arangodb::velocypack::StringRef const& name) noexcept; /// @brief determine whether a data-source name is a system data-source name diff --git a/arangosh/Utils/ClientManager.cpp b/arangosh/Utils/ClientManager.cpp index 413d5cd3471f..2cc7c4ed3a40 100644 --- a/arangosh/Utils/ClientManager.cpp +++ b/arangosh/Utils/ClientManager.cpp @@ -25,6 +25,7 @@ #include "ClientManager.h" #include "ApplicationFeatures/ApplicationServer.h" +#include "Basics/StringUtils.h" #include "Basics/VelocyPackHelper.h" #include "Basics/application-exit.h" #include "Logger/Logger.h" @@ -168,9 +169,9 @@ std::string ClientManager::rewriteLocation(void* data, std::string const& locati // prefix with `/_db/${dbname}/` if (location[0] == '/') { // already have leading "/", leave it off - return "/_db/" + dbname + location; + return "/_db/" + basics::StringUtils::urlEncode(dbname) + location; } - return "/_db/" + dbname + "/" + location; + return "/_db/" + basics::StringUtils::urlEncode(dbname) + "/" + location; } std::pair ClientManager::getArangoIsCluster(httpclient::SimpleHttpClient& client) { diff --git a/arangosh/V8Client/ArangoClientHelper.cpp b/arangosh/V8Client/ArangoClientHelper.cpp index e96e805042a4..6e09b9434616 100644 --- a/arangosh/V8Client/ArangoClientHelper.cpp +++ b/arangosh/V8Client/ArangoClientHelper.cpp @@ -26,6 +26,7 @@ #include #include +#include "Basics/StringUtils.h" #include "Basics/VelocyPackHelper.h" #include "Shell/ClientFeature.h" #include "SimpleHttpClient/GeneralClientConnection.h" @@ -48,9 +49,9 @@ std::string ArangoClientHelper::rewriteLocation(void* data, std::string const& l std::string const& dbname = client->databaseName(); if (location[0] == '/') { - return "/_db/" + dbname + location; + return "/_db/" + basics::StringUtils::urlEncode(dbname) + location; } - return "/_db/" + dbname + "/" + location; + return "/_db/" + basics::StringUtils::urlEncode(dbname) + "/" + location; } // extract an error message from a response diff --git a/js/apps/system/_admin/aardvark/APP/frontend/js/templates/databaseView.ejs b/js/apps/system/_admin/aardvark/APP/frontend/js/templates/databaseView.ejs index a3006c80a5f3..8dbc3d2904a8 100644 --- a/js/apps/system/_admin/aardvark/APP/frontend/js/templates/databaseView.ejs +++ b/js/apps/system/_admin/aardvark/APP/frontend/js/templates/databaseView.ejs @@ -55,13 +55,13 @@
<% if (!readOnly) { %> - + <% } %>
-
<%=name %>
+
<%=_.escape(name)%>
<%};%> diff --git a/lib/Basics/Utf8Helper.cpp b/lib/Basics/Utf8Helper.cpp index 4c5b3772da3c..633906bc5ef9 100755 --- a/lib/Basics/Utf8Helper.cpp +++ b/lib/Basics/Utf8Helper.cpp @@ -46,6 +46,7 @@ #include "Utf8Helper.h" +#include "Basics/Exceptions.h" #include "Basics/StaticStrings.h" #include "Basics/debugging.h" #include "Basics/memory.h" @@ -738,6 +739,21 @@ char* TRI_normalize_utf8_to_NFC(char const* utf8, size_t inLength, size_t* outLe return utf8Dest; } +std::string normalizeUtf8ToNFC(char const* value, size_t length) { + size_t outLength = 0; + char* normalized = TRI_normalize_utf8_to_NFC(value, length, &outLength); + if (normalized == nullptr) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); + } + std::string result(normalized, outLength); + TRI_Free(normalized); + return result; +} + +std::string normalizeUtf8ToNFC(std::string const& value) { + return normalizeUtf8ToNFC(value.data(), value.size()); +} + //////////////////////////////////////////////////////////////////////////////// /// @brief normalize an utf8 string (NFC) //////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Basics/Utf8Helper.h b/lib/Basics/Utf8Helper.h index 68105518b8b7..dd748ce9e5b0 100644 --- a/lib/Basics/Utf8Helper.h +++ b/lib/Basics/Utf8Helper.h @@ -212,6 +212,9 @@ char* TRI_UCharToUtf8(UChar const* uchar, size_t inLength, size_t* outLength); char* TRI_normalize_utf8_to_NFC(char const* utf8, size_t inLength, size_t* outLength); +std::string normalizeUtf8ToNFC(char const* value, size_t length); +std::string normalizeUtf8ToNFC(std::string const& name); + //////////////////////////////////////////////////////////////////////////////// /// @brief normalize an utf16 string (NFC) and export it to utf8 //////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Rest/HttpRequest.cpp b/lib/Rest/HttpRequest.cpp index 32b950625d8d..fe2b7b596e74 100644 --- a/lib/Rest/HttpRequest.cpp +++ b/lib/Rest/HttpRequest.cpp @@ -24,6 +24,7 @@ #include "HttpRequest.h" #include "Basics/NumberUtils.h" +#include "Basics/Utf8Helper.h" #include #include @@ -42,6 +43,30 @@ using namespace arangodb; using namespace arangodb::basics; +namespace { +std::string url_decode(const char* begin, const char* end) { + std::string out; + out.reserve(static_cast(end - begin)); + for (const char* i = begin; i != end; ++i) { + std::string::value_type c = (*i); + if (c == '%') { + if (i + 2 < end) { + int h = StringUtils::hex2int(i[1], 0) << 4; + h += StringUtils::hex2int(i[2], 0); + out.push_back(static_cast(h & 0xFF)); + i += 2; + } + } else if (c == '+') { + out.push_back(' '); + } else { + out.push_back(c); + } + } + + return out; +} +} // namespace + HttpRequest::HttpRequest(ConnectionInfo const& connectionInfo, uint64_t mid, bool allowMethodOverride) : GeneralRequest(connectionInfo, mid), @@ -201,7 +226,7 @@ void HttpRequest::parseHeader(char* start, size_t length) { ++q; } - _databaseName = std::string(pathBegin, q - pathBegin); + _databaseName = normalizeUtf8ToNFC(::url_decode(pathBegin, q)); pathBegin = q; } @@ -373,28 +398,6 @@ void HttpRequest::parseHeader(char* start, size_t length) { } } -namespace { - std::string url_decode(const char* begin, const char* end) { - std::string out; - out.reserve(static_cast(end - begin)); - for (const char* i = begin; i != end; ++i) { - std::string::value_type c = (*i); - if (c == '%') { - if (i + 2 < end) { - int h = StringUtils::hex2int(i[1], 0) << 4; - h += StringUtils::hex2int(i[2], 0); - out.push_back(static_cast(h & 0x7F)); - i += 2; - } - } else if (c == '+') { - out.push_back(' '); - } else { - out.push_back(c); - } - } - return out; - } -} void HttpRequest::parseUrl(const char* path, size_t length) { std::string tmp; @@ -430,7 +433,7 @@ void HttpRequest::parseUrl(const char* path, size_t length) { } TRI_ASSERT(q >= start); - _databaseName.assign(start, q - start); + _databaseName = normalizeUtf8ToNFC(::url_decode(start, q)); _fullUrl.assign(q, end - q); start = q; diff --git a/tests/js/common/shell/shell-object-names.js b/tests/js/common/shell/shell-object-names.js new file mode 100644 index 000000000000..9b183f69f333 --- /dev/null +++ b/tests/js/common/shell/shell-object-names.js @@ -0,0 +1,238 @@ +/*jshint globalstrict:false, strict:false */ +/*global assertEqual, assertNotEqual, assertTrue, fail, NORMALIZE_STRING */ + +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2018 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Jan Steemann +//////////////////////////////////////////////////////////////////////////////// + +let jsunity = require("jsunity"); +let arangodb = require("@arangodb"); +let internal = require("internal"); +let db = arangodb.db; + +function DatabaseNamesSuite() { + return { + + testInvalidPunctuationDatabaseNames : function () { + const names = [ + "", + "_", + ".", + "/", + "//", + "a/", + "/a", + "a/1", + "foo/bar", + "foo/bar/baz", + ".a.a.a.a.", + "..", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "00", + "10000", + "001000", + "999999999", + "9999/3933", + "9aaaa", + ]; + + names.forEach((name) => { + db._useDatabase("_system"); + try { + db._createDatabase(name); + fail(); + } catch (err) { + assertEqual(internal.errors.ERROR_ARANGO_DATABASE_NAME_INVALID.code, err.errorNum, name); + } + }); + }, + + testValidPunctuationDatabaseNames : function () { + const names = [ + "a b c", + "A B C", + "a.a.a.a.", + "a00000", + "a quick brown fox", + "(foo|bar|baz)", + "-.-.-.,", + " ", + "-", + ",", + ":", + ";", + "!", + "\"", + "\\", + "'", + "$", + "#", + "%", + "&", + "(", + ")", + "=", + "[", + "]", + "{", + "}", + "?", + "´", + "`", + "+", + "-", + "*", + "#", + "<", + ">", + "|", + ]; + + names.forEach((name) => { + db._useDatabase("_system"); + let d = db._createDatabase(name); + assertTrue(d); + + assertNotEqual(-1, db._databases().indexOf(name), name); + + db._useDatabase(name); + assertEqual(db._name(), name); + + db._useDatabase("_system"); + db._dropDatabase(name); + assertEqual(-1, db._databases().indexOf(name), name); + }); + }, + + + testCheckUnicodeDatabaseNames : function () { + // some of these test strings are taken from https://www.w3.org/2001/06/utf-8-test/UTF-8-demo.html + const names = [ + "mötör", + "TRÖÖÖÖÖÖÖÖÖTKÄKÄR", + "∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i)", + "∀x∈ℝ: ⌈x⌉ = −⌊−x⌋", + "α ∧ ¬β = ¬(¬α ∨ β)", + "two H₂ + O₂ ⇌ 2H₂O", + "R = 4.7 kΩ, ⌀ 200 mm", + "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn", + "Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]", + "Οὐχὶ ταὐτὰ παρίσταταί μοι", + "γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι", + "გთხოვთ ახლავე გაიაროთ რეგისტრაცია", + "Unicode-ის მეათე საერთაშორისო", + "Зарегистрируйтесь сейчас на", + "Десятую Международную Конференцию по", + "ሰማይ አይታረስ ንጉሥ አይከሰስ።", + "ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ", + "ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ", + "⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌", + "£", + "ß", + "ó", + "ę", + "Я", + "λ", + "💩", + "🍺", + "🌧", + "⛈ ", + "🌩", + "⚡", + "🔥", + "💥", + "🌨", + "😀 :grinning:", + "😬 :grimacing:", + "😁 :grin:", + "😂 :joy:", + "😃 :smiley:", + "😄 :smile:", + "😅 :sweat_smile:", + "😆 :laughing:", + "😇 :innocent:", + "😉 :wink:", + "😊 :blush:", + "🙂 :slight_smile:", + "🙃 :upside_down:", + "😋 :yum:", + "😌 :relieved:", + "😍 :heart_eyes:", + "😘 :kissing_heart:", + "😗 :kissing:", + "😙 :kissing_smiling_eyes:", + "😚 :kissing_closed_eyes:", + "😜 :stuck_out_tongue_winking_eye:", + "😝 :stuck_out_tongue_closed_eyes:", + "😛 :stuck_out_tongue:", + "🤑 :money_mouth:", + "🤓 :nerd:", + "😎 :sunglasses:", + "🤗 :hugging:", + "😏 :smirk:", + "😶 :no_mouth:", + "😐 :neutral_face:", + "😑 :expressionless:", + "😒 :unamused:", + "🙄 :rolling_eyes:", + "🤔 :thinking:", + "😳 :flushed:", + "😞 :disappointed:", + "😟 :worried:", + "😠 :angry:", + "😡 :rage:", + "😔 :pensive:", + "😕 :confused:", + ]; + + names.forEach((name) => { + db._useDatabase("_system"); + let d = db._createDatabase(name); + assertTrue(d); + + assertNotEqual(-1, db._databases().indexOf(NORMALIZE_STRING(name)), NORMALIZE_STRING(name)); + + db._useDatabase(name); + assertEqual(NORMALIZE_STRING(db._name()), NORMALIZE_STRING(name)); + + db._useDatabase("_system"); + db._dropDatabase(name); + assertEqual(-1, db._databases().indexOf(name), NORMALIZE_STRING(name)); + }); + }, + + }; +} + +jsunity.run(DatabaseNamesSuite); + +return jsunity.done(); From a6b0b20e6e87d832a2e99672f3a40f7cd61baa6d Mon Sep 17 00:00:00 2001 From: jsteemann Date: Tue, 3 Aug 2021 18:30:32 +0200 Subject: [PATCH 002/101] fix XSS --- .../APP/frontend/js/templates/dbSelectionView.ejs | 2 +- .../APP/frontend/js/templates/subNavigationView.ejs | 2 +- .../aardvark/APP/frontend/js/views/databaseView.js | 8 -------- .../aardvark/APP/frontend/js/views/loginView.js | 11 +++++------ 4 files changed, 7 insertions(+), 16 deletions(-) diff --git a/js/apps/system/_admin/aardvark/APP/frontend/js/templates/dbSelectionView.ejs b/js/apps/system/_admin/aardvark/APP/frontend/js/templates/dbSelectionView.ejs index 56ff1c58f3f2..40c694ccd3bf 100644 --- a/js/apps/system/_admin/aardvark/APP/frontend/js/templates/dbSelectionView.ejs +++ b/js/apps/system/_admin/aardvark/APP/frontend/js/templates/dbSelectionView.ejs @@ -14,7 +14,7 @@ if (list.length > 0) { _.each(list, function(i) { %> <% }); diff --git a/js/apps/system/_admin/aardvark/APP/frontend/js/templates/subNavigationView.ejs b/js/apps/system/_admin/aardvark/APP/frontend/js/templates/subNavigationView.ejs index e2856b6fdb75..fb86b322cef1 100644 --- a/js/apps/system/_admin/aardvark/APP/frontend/js/templates/subNavigationView.ejs +++ b/js/apps/system/_admin/aardvark/APP/frontend/js/templates/subNavigationView.ejs @@ -16,7 +16,7 @@