From b6eb49db35299798edca4ea28ff9b0a583abba48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Thu, 26 Aug 2021 15:44:17 +0200 Subject: [PATCH 1/5] [3.8] Lower priority of AQL lanes (#14699) * Lower priority of AQL lanes * Added CHANGELOG entry * Improved comments Co-authored-by: Vadim --- CHANGELOG | 4 ++ arangod/Aql/Query.cpp | 2 +- arangod/Aql/RestAqlHandler.cpp | 8 ++++ arangod/Aql/RestAqlHandler.h | 2 +- arangod/Aql/SharedQueryState.cpp | 62 +++++++++++++++-------------- arangod/GeneralServer/RequestLane.h | 19 ++++----- 6 files changed, 57 insertions(+), 40 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index d972f7047b87..80e0a843bd05 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,10 @@ v3.8.1 (XXXX-XX-XX) ------------------- +* Reduce internal priority of AQL execution. This prevents possible deadlocks + with modification operations in a cluster and replicationFactor >= 2, and can + also improve responsiveness under high load of AQL queries. + * Updated arangosync to 2.6.0. * Added protocol specific metrics: histogram about request body size, total diff --git a/arangod/Aql/Query.cpp b/arangod/Aql/Query.cpp index 8f642247dcb4..71d28bf09c69 100644 --- a/arangod/Aql/Query.cpp +++ b/arangod/Aql/Query.cpp @@ -1261,7 +1261,7 @@ futures::Future finishDBServerParts(Query& query, ErrorCode errorCode) { network::RequestOptions options; options.database = query.vocbase().name(); options.timeout = network::Timeout(60.0); // Picked arbitrarily - options.continuationLane = RequestLane::CLUSTER_AQL_CONTINUATION; + options.continuationLane = RequestLane::CLUSTER_AQL_INTERNAL_COORDINATOR; // options.skipScheduler = true; VPackBuffer body; diff --git a/arangod/Aql/RestAqlHandler.cpp b/arangod/Aql/RestAqlHandler.cpp index 38a010dbe00c..32048f0aad7d 100644 --- a/arangod/Aql/RestAqlHandler.cpp +++ b/arangod/Aql/RestAqlHandler.cpp @@ -851,3 +851,11 @@ RestStatus RestAqlHandler::handleFinishQuery(std::string const& idString) { generateResult(rest::ResponseCode::OK, std::move(buffer)); })); } + +RequestLane RestAqlHandler::lane() const { + if (ServerState::instance()->isCoordinator()) { + return RequestLane::CLUSTER_AQL_INTERNAL_COORDINATOR; + } else { + return RequestLane::CLUSTER_AQL; + } +} diff --git a/arangod/Aql/RestAqlHandler.h b/arangod/Aql/RestAqlHandler.h index 380f7b981f79..b890696a5efa 100644 --- a/arangod/Aql/RestAqlHandler.h +++ b/arangod/Aql/RestAqlHandler.h @@ -44,7 +44,7 @@ class RestAqlHandler : public RestVocbaseBaseHandler { public: char const* name() const override final { return "RestAqlHandler"; } - RequestLane lane() const override final { return RequestLane::CLUSTER_AQL; } + RequestLane lane() const override final; RestStatus execute() override; RestStatus continueExecute() override; void shutdownExecute(bool isFinalized) noexcept override; diff --git a/arangod/Aql/SharedQueryState.cpp b/arangod/Aql/SharedQueryState.cpp index 6820efa79dec..2b5d2f61e350 100644 --- a/arangod/Aql/SharedQueryState.cpp +++ b/arangod/Aql/SharedQueryState.cpp @@ -26,6 +26,7 @@ #include "ApplicationFeatures/ApplicationServer.h" #include "Basics/Exceptions.h" #include "Basics/ScopeGuard.h" +#include "Cluster/ServerState.h" #include "RestServer/QueryRegistryFeature.h" #include "Scheduler/Scheduler.h" #include "Scheduler/SchedulerFeature.h" @@ -129,34 +130,37 @@ void SharedQueryState::queueHandler() { return; } - bool queued = - scheduler->queue(RequestLane::CLUSTER_AQL_CONTINUATION, - [self = shared_from_this(), cb = _wakeupCb, v = _cbVersion]() { - std::unique_lock lck(self->_mutex, std::defer_lock); - - do { - bool cntn = false; - try { - cntn = cb(); - } catch (...) { - } - - lck.lock(); - if (v == self->_cbVersion) { - unsigned c = self->_numWakeups--; - TRI_ASSERT(c > 0); - if (c == 1 || !cntn || !self->_valid) { - break; - } - } else { - return; - } - lck.unlock(); - } while (true); - - TRI_ASSERT(lck); - self->queueHandler(); - }); + auto const lane = ServerState::instance()->isCoordinator() + ? RequestLane::CLUSTER_AQL_INTERNAL_COORDINATOR + : RequestLane::CLUSTER_AQL; + + bool queued = scheduler->queue(lane, [self = shared_from_this(), + cb = _wakeupCb, v = _cbVersion]() { + std::unique_lock lck(self->_mutex, std::defer_lock); + + do { + bool cntn = false; + try { + cntn = cb(); + } catch (...) { + } + + lck.lock(); + if (v == self->_cbVersion) { + unsigned c = self->_numWakeups--; + TRI_ASSERT(c > 0); + if (c == 1 || !cntn || !self->_valid) { + break; + } + } else { + return; + } + lck.unlock(); + } while (true); + + TRI_ASSERT(lck); + self->queueHandler(); + }); if (!queued) { // just invalidate _wakeupCb = nullptr; @@ -168,7 +172,7 @@ void SharedQueryState::queueHandler() { bool SharedQueryState::queueAsyncTask(fu2::unique_function cb) { Scheduler* scheduler = SchedulerFeature::SCHEDULER; if (scheduler) { - return scheduler->queue(RequestLane::CLIENT_AQL, std::move(cb)); + return scheduler->queue(RequestLane::CLUSTER_AQL, std::move(cb)); } return false; } diff --git a/arangod/GeneralServer/RequestLane.h b/arangod/GeneralServer/RequestLane.h index ffe1f6223115..fdc5b673ad15 100644 --- a/arangod/GeneralServer/RequestLane.h +++ b/arangod/GeneralServer/RequestLane.h @@ -62,15 +62,16 @@ enum class RequestLane { // V8 or having high priority. CLUSTER_INTERNAL, - // For requests from the DBserver to the Coordinator or - // from the Coordinator to the DBserver. Using AQL - // these have Medium priority. + // Internal AQL requests, or continuations. Low priority. CLUSTER_AQL, - // For future continuations initiated as part of an AQL request from the - // DBserver to the Coordinator or from the Coordinator to the DBserver. - // These have High priority. - CLUSTER_AQL_CONTINUATION, + // For requests from the DBserver to the Coordinator, and continuations on the + // Coordinator. + // These have medium priority. Because client requests made against the + // RestCursorHandler (with lane CLIENT_AQL) might block and need these to + // finish. Ongoing low priority requests can also prevent low priority lanes + // from being worked on, having the same effect. + CLUSTER_AQL_INTERNAL_COORDINATOR, // For requests from the from the Coordinator to the // DBserver using V8. @@ -142,9 +143,9 @@ inline RequestPriority PriorityRequestLane(RequestLane lane) { case RequestLane::CLUSTER_INTERNAL: return RequestPriority::HIGH; case RequestLane::CLUSTER_AQL: + return RequestPriority::LOW; + case RequestLane::CLUSTER_AQL_INTERNAL_COORDINATOR: return RequestPriority::MED; - case RequestLane::CLUSTER_AQL_CONTINUATION: - return RequestPriority::HIGH; case RequestLane::CLUSTER_V8: return RequestPriority::LOW; case RequestLane::CLUSTER_ADMIN: From fc248f727a40584e15404c94ddd3d36f067cad01 Mon Sep 17 00:00:00 2001 From: Jan Date: Thu, 26 Aug 2021 15:45:09 +0200 Subject: [PATCH 2/5] added a test for statistics behavior (#14703) --- .../test-statistics-enabled-false.js | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 tests/js/client/server_parameters/test-statistics-enabled-false.js diff --git a/tests/js/client/server_parameters/test-statistics-enabled-false.js b/tests/js/client/server_parameters/test-statistics-enabled-false.js new file mode 100644 index 000000000000..c6e5b1967f0f --- /dev/null +++ b/tests/js/client/server_parameters/test-statistics-enabled-false.js @@ -0,0 +1,88 @@ +/*jshint globalstrict:false, strict:false */ +/* global getOptions, assertEqual, assertTrue, arango */ + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test for security-related server options +/// +/// @file +/// +/// DISCLAIMER +/// +/// Copyright 2010-2012 triagens GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB Inc, Cologne, Germany +/// +/// @author Jan Steemann +/// @author Copyright 2019, ArangoDB Inc, Cologne, Germany +//////////////////////////////////////////////////////////////////////////////// + +if (getOptions === true) { + return { + 'server.statistics': "false", + 'server.export-metrics-api': "true", + }; +} + +const jsunity = require('jsunity'); + +function testSuite() { + return { + testGetAdminStatistics : function() { + let res = arango.GET("/_admin/statistics"); + assertTrue(res.error); + assertEqual(404, res.code); + }, + + testGetAdminStatisticsDescription : function() { + let res = arango.GET("/_admin/statistics-description"); + assertTrue(res.error); + assertEqual(404, res.code); + }, + + testGetMetrics : function() { + let metrics = {}; + String(arango.GET("/_admin/metrics/v2")) + .split("\n") + .filter((line) => line.match(/^[^#]/)) + .filter((line) => line.match(/^arangodb_/)) + .forEach((line) => { + let name = line.replace(/[ \{].*$/g, ''); + let value = Number(line.replace(/^.+ (\d+)$/, '$1')); + metrics[name] = value; + }); + + const expected = [ + "arangodb_process_statistics_minor_page_faults_total", + "arangodb_process_statistics_major_page_faults_total", + "arangodb_process_statistics_user_time", + "arangodb_process_statistics_system_time", + "arangodb_process_statistics_number_of_threads", + "arangodb_process_statistics_resident_set_size", + "arangodb_process_statistics_resident_set_size_percent", + "arangodb_process_statistics_virtual_memory_size", + "arangodb_server_statistics_physical_memory", + "arangodb_server_statistics_server_uptime_total", + ]; + + expected.forEach((name) => { + assertTrue(metrics.hasOwnProperty(name), name); + assertEqual("number", typeof metrics[name], name); + }); + }, + }; +} + +jsunity.run(testSuite); +return jsunity.done(); From 24bc4c20098ab0e5a6f53a6867048f1f81b9e507 Mon Sep 17 00:00:00 2001 From: Jan Date: Thu, 26 Aug 2021 15:45:42 +0200 Subject: [PATCH 3/5] properly rename test file (#14705) --- ...istics-spec-nocluster.js => api-statistics-spec-noncluster.js} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/js/client/shell/{api-statistics-spec-nocluster.js => api-statistics-spec-noncluster.js} (100%) diff --git a/tests/js/client/shell/api-statistics-spec-nocluster.js b/tests/js/client/shell/api-statistics-spec-noncluster.js similarity index 100% rename from tests/js/client/shell/api-statistics-spec-nocluster.js rename to tests/js/client/shell/api-statistics-spec-noncluster.js From dc105d6e50dc2d41f6e1e0d94939b7fadc7e747e Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Wed, 15 Sep 2021 16:44:29 +0200 Subject: [PATCH 4/5] Backport fix to 3.8. --- CHANGELOG | 2 + arangod/RocksDBEngine/RocksDBVPackIndex.cpp | 9 +++ .../shell/shell-array-index-noncluster.js | 70 ++++++++++++++++++- 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index e1bee5021aca..44d061b98934 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ v3.8.2 (XXXX-XX-XX) ------------------- +* Fixed a bug for array indexes on update of documents. See BTS-548. + * Fix active failover, so that the new host actually has working foxx services. (BTS-558) diff --git a/arangod/RocksDBEngine/RocksDBVPackIndex.cpp b/arangod/RocksDBEngine/RocksDBVPackIndex.cpp index 943d04234fbf..4aa65f6b2a39 100644 --- a/arangod/RocksDBEngine/RocksDBVPackIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBVPackIndex.cpp @@ -989,6 +989,15 @@ namespace { if (begin->shouldExpand && first.isArray() && second.isArray()) { + if (first.length() != second.length()) { + // Nonequal length, so there is a difference! + // We have to play this carefully here. It is possible that the + // set of values found is the same, but we must err on the side + // of caution in this case and use the slow path. Note in + // particular that the following code returns `true`, if one + // of the arrays is empty, which is not correct! + return false; + } auto next = begin + 1; VPackArrayIterator it1(first), it2(second); while (it1.valid() && it2.valid()) { diff --git a/tests/js/server/shell/shell-array-index-noncluster.js b/tests/js/server/shell/shell-array-index-noncluster.js index b575adde7211..6831e6747b3e 100644 --- a/tests/js/server/shell/shell-array-index-noncluster.js +++ b/tests/js/server/shell/shell-array-index-noncluster.js @@ -275,7 +275,75 @@ function arraySkiplistIndexSuite () { catch (err) { assertEqual(errors.ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED.code, err.errorNum); } - } + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test: Test update of an array index where entries are removed: +//////////////////////////////////////////////////////////////////////////////// + + testPersistentArrayIndexUpdates : function () { + collection.ensureIndex({type:"persistent", fields: ["a[*].b"], unique: true}); + + let meta = collection.insert({a: [{b:"xyz"}]}); + + try { + collection.insert({a: [{b:"xyz"}]}); + fail(); + } + catch (err) { + assertEqual(errors.ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED.code, err.errorNum); + } + + collection.update(meta._key, {a: []}); + + let meta2 = collection.insert({a: [{b:"xyz"}]}); // must work again + + try { + collection.insert({a: [{b:"xyz"}]}); + fail(); + } + catch (err) { + assertEqual(errors.ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED.code, err.errorNum); + } + + collection.replace(meta2._key, {a: []}); + + collection.insert({a: [{b:"xyz"}]}); // must work again + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test: Test update of an array index where entries are changed: +//////////////////////////////////////////////////////////////////////////////// + + testPersistentArrayIndexUpdates2 : function () { + collection.ensureIndex({type:"persistent", fields: ["a[*].b"], unique: true}); + + let meta = collection.insert({a: [{b:"xyz"}]}); + + try { + collection.insert({a: [{b:"xyz"}]}); + fail(); + } + catch (err) { + assertEqual(errors.ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED.code, err.errorNum); + } + + collection.update(meta._key, {a: [{b:"123"}]}); + + let meta2 = collection.insert({a: [{b:"xyz"}]}); // must work again + + try { + collection.insert({a: [{b:"xyz"}]}); + fail(); + } + catch (err) { + assertEqual(errors.ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED.code, err.errorNum); + } + + collection.replace(meta2._key, {a: [{b:"456"}]}); + + collection.insert({a: [{b:"xyz"}]}); // must work again + }, }; } From 88274486f197662518a05fef093470d7ce78ffbe Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Wed, 15 Sep 2021 16:47:24 +0200 Subject: [PATCH 5/5] Strange merge artifact rectified. --- CHANGELOG | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 44d061b98934..647283d78881 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -100,10 +100,6 @@ v3.8.2 (XXXX-XX-XX) v3.8.1 (2021-08-27) ------------------- -* Reduce internal priority of AQL execution. This prevents possible deadlocks - with modification operations in a cluster and replicationFactor >= 2, and can - also improve responsiveness under high load of AQL queries. - * Updated arangosync to 2.6.0. * Added protocol specific metrics: histogram about request body size, total