From b6eb49db35299798edca4ea28ff9b0a583abba48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Thu, 26 Aug 2021 15:44:17 +0200 Subject: [PATCH 1/5] [3.8] Lower priority of AQL lanes (#14699) * Lower priority of AQL lanes * Added CHANGELOG entry * Improved comments Co-authored-by: Vadim --- CHANGELOG | 4 ++ arangod/Aql/Query.cpp | 2 +- arangod/Aql/RestAqlHandler.cpp | 8 ++++ arangod/Aql/RestAqlHandler.h | 2 +- arangod/Aql/SharedQueryState.cpp | 62 +++++++++++++++-------------- arangod/GeneralServer/RequestLane.h | 19 ++++----- 6 files changed, 57 insertions(+), 40 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index d972f7047b87..80e0a843bd05 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,10 @@ v3.8.1 (XXXX-XX-XX) ------------------- +* Reduce internal priority of AQL execution. This prevents possible deadlocks + with modification operations in a cluster and replicationFactor >= 2, and can + also improve responsiveness under high load of AQL queries. + * Updated arangosync to 2.6.0. * Added protocol specific metrics: histogram about request body size, total diff --git a/arangod/Aql/Query.cpp b/arangod/Aql/Query.cpp index 8f642247dcb4..71d28bf09c69 100644 --- a/arangod/Aql/Query.cpp +++ b/arangod/Aql/Query.cpp @@ -1261,7 +1261,7 @@ futures::Future finishDBServerParts(Query& query, ErrorCode errorCode) { network::RequestOptions options; options.database = query.vocbase().name(); options.timeout = network::Timeout(60.0); // Picked arbitrarily - options.continuationLane = RequestLane::CLUSTER_AQL_CONTINUATION; + options.continuationLane = RequestLane::CLUSTER_AQL_INTERNAL_COORDINATOR; // options.skipScheduler = true; VPackBuffer body; diff --git a/arangod/Aql/RestAqlHandler.cpp b/arangod/Aql/RestAqlHandler.cpp index 38a010dbe00c..32048f0aad7d 100644 --- a/arangod/Aql/RestAqlHandler.cpp +++ b/arangod/Aql/RestAqlHandler.cpp @@ -851,3 +851,11 @@ RestStatus RestAqlHandler::handleFinishQuery(std::string const& idString) { generateResult(rest::ResponseCode::OK, std::move(buffer)); })); } + +RequestLane RestAqlHandler::lane() const { + if (ServerState::instance()->isCoordinator()) { + return RequestLane::CLUSTER_AQL_INTERNAL_COORDINATOR; + } else { + return RequestLane::CLUSTER_AQL; + } +} diff --git a/arangod/Aql/RestAqlHandler.h b/arangod/Aql/RestAqlHandler.h index 380f7b981f79..b890696a5efa 100644 --- a/arangod/Aql/RestAqlHandler.h +++ b/arangod/Aql/RestAqlHandler.h @@ -44,7 +44,7 @@ class RestAqlHandler : public RestVocbaseBaseHandler { public: char const* name() const override final { return "RestAqlHandler"; } - RequestLane lane() const override final { return RequestLane::CLUSTER_AQL; } + RequestLane lane() const override final; RestStatus execute() override; RestStatus continueExecute() override; void shutdownExecute(bool isFinalized) noexcept override; diff --git a/arangod/Aql/SharedQueryState.cpp b/arangod/Aql/SharedQueryState.cpp index 6820efa79dec..2b5d2f61e350 100644 --- a/arangod/Aql/SharedQueryState.cpp +++ b/arangod/Aql/SharedQueryState.cpp @@ -26,6 +26,7 @@ #include "ApplicationFeatures/ApplicationServer.h" #include "Basics/Exceptions.h" #include "Basics/ScopeGuard.h" +#include "Cluster/ServerState.h" #include "RestServer/QueryRegistryFeature.h" #include "Scheduler/Scheduler.h" #include "Scheduler/SchedulerFeature.h" @@ -129,34 +130,37 @@ void SharedQueryState::queueHandler() { return; } - bool queued = - scheduler->queue(RequestLane::CLUSTER_AQL_CONTINUATION, - [self = shared_from_this(), cb = _wakeupCb, v = _cbVersion]() { - std::unique_lock lck(self->_mutex, std::defer_lock); - - do { - bool cntn = false; - try { - cntn = cb(); - } catch (...) { - } - - lck.lock(); - if (v == self->_cbVersion) { - unsigned c = self->_numWakeups--; - TRI_ASSERT(c > 0); - if (c == 1 || !cntn || !self->_valid) { - break; - } - } else { - return; - } - lck.unlock(); - } while (true); - - TRI_ASSERT(lck); - self->queueHandler(); - }); + auto const lane = ServerState::instance()->isCoordinator() + ? RequestLane::CLUSTER_AQL_INTERNAL_COORDINATOR + : RequestLane::CLUSTER_AQL; + + bool queued = scheduler->queue(lane, [self = shared_from_this(), + cb = _wakeupCb, v = _cbVersion]() { + std::unique_lock lck(self->_mutex, std::defer_lock); + + do { + bool cntn = false; + try { + cntn = cb(); + } catch (...) { + } + + lck.lock(); + if (v == self->_cbVersion) { + unsigned c = self->_numWakeups--; + TRI_ASSERT(c > 0); + if (c == 1 || !cntn || !self->_valid) { + break; + } + } else { + return; + } + lck.unlock(); + } while (true); + + TRI_ASSERT(lck); + self->queueHandler(); + }); if (!queued) { // just invalidate _wakeupCb = nullptr; @@ -168,7 +172,7 @@ void SharedQueryState::queueHandler() { bool SharedQueryState::queueAsyncTask(fu2::unique_function cb) { Scheduler* scheduler = SchedulerFeature::SCHEDULER; if (scheduler) { - return scheduler->queue(RequestLane::CLIENT_AQL, std::move(cb)); + return scheduler->queue(RequestLane::CLUSTER_AQL, std::move(cb)); } return false; } diff --git a/arangod/GeneralServer/RequestLane.h b/arangod/GeneralServer/RequestLane.h index ffe1f6223115..fdc5b673ad15 100644 --- a/arangod/GeneralServer/RequestLane.h +++ b/arangod/GeneralServer/RequestLane.h @@ -62,15 +62,16 @@ enum class RequestLane { // V8 or having high priority. CLUSTER_INTERNAL, - // For requests from the DBserver to the Coordinator or - // from the Coordinator to the DBserver. Using AQL - // these have Medium priority. + // Internal AQL requests, or continuations. Low priority. CLUSTER_AQL, - // For future continuations initiated as part of an AQL request from the - // DBserver to the Coordinator or from the Coordinator to the DBserver. - // These have High priority. - CLUSTER_AQL_CONTINUATION, + // For requests from the DBserver to the Coordinator, and continuations on the + // Coordinator. + // These have medium priority. Because client requests made against the + // RestCursorHandler (with lane CLIENT_AQL) might block and need these to + // finish. Ongoing low priority requests can also prevent low priority lanes + // from being worked on, having the same effect. + CLUSTER_AQL_INTERNAL_COORDINATOR, // For requests from the from the Coordinator to the // DBserver using V8. @@ -142,9 +143,9 @@ inline RequestPriority PriorityRequestLane(RequestLane lane) { case RequestLane::CLUSTER_INTERNAL: return RequestPriority::HIGH; case RequestLane::CLUSTER_AQL: + return RequestPriority::LOW; + case RequestLane::CLUSTER_AQL_INTERNAL_COORDINATOR: return RequestPriority::MED; - case RequestLane::CLUSTER_AQL_CONTINUATION: - return RequestPriority::HIGH; case RequestLane::CLUSTER_V8: return RequestPriority::LOW; case RequestLane::CLUSTER_ADMIN: From fc248f727a40584e15404c94ddd3d36f067cad01 Mon Sep 17 00:00:00 2001 From: Jan Date: Thu, 26 Aug 2021 15:45:09 +0200 Subject: [PATCH 2/5] added a test for statistics behavior (#14703) --- .../test-statistics-enabled-false.js | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 tests/js/client/server_parameters/test-statistics-enabled-false.js diff --git a/tests/js/client/server_parameters/test-statistics-enabled-false.js b/tests/js/client/server_parameters/test-statistics-enabled-false.js new file mode 100644 index 000000000000..c6e5b1967f0f --- /dev/null +++ b/tests/js/client/server_parameters/test-statistics-enabled-false.js @@ -0,0 +1,88 @@ +/*jshint globalstrict:false, strict:false */ +/* global getOptions, assertEqual, assertTrue, arango */ + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test for security-related server options +/// +/// @file +/// +/// DISCLAIMER +/// +/// Copyright 2010-2012 triagens GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB Inc, Cologne, Germany +/// +/// @author Jan Steemann +/// @author Copyright 2019, ArangoDB Inc, Cologne, Germany +//////////////////////////////////////////////////////////////////////////////// + +if (getOptions === true) { + return { + 'server.statistics': "false", + 'server.export-metrics-api': "true", + }; +} + +const jsunity = require('jsunity'); + +function testSuite() { + return { + testGetAdminStatistics : function() { + let res = arango.GET("/_admin/statistics"); + assertTrue(res.error); + assertEqual(404, res.code); + }, + + testGetAdminStatisticsDescription : function() { + let res = arango.GET("/_admin/statistics-description"); + assertTrue(res.error); + assertEqual(404, res.code); + }, + + testGetMetrics : function() { + let metrics = {}; + String(arango.GET("/_admin/metrics/v2")) + .split("\n") + .filter((line) => line.match(/^[^#]/)) + .filter((line) => line.match(/^arangodb_/)) + .forEach((line) => { + let name = line.replace(/[ \{].*$/g, ''); + let value = Number(line.replace(/^.+ (\d+)$/, '$1')); + metrics[name] = value; + }); + + const expected = [ + "arangodb_process_statistics_minor_page_faults_total", + "arangodb_process_statistics_major_page_faults_total", + "arangodb_process_statistics_user_time", + "arangodb_process_statistics_system_time", + "arangodb_process_statistics_number_of_threads", + "arangodb_process_statistics_resident_set_size", + "arangodb_process_statistics_resident_set_size_percent", + "arangodb_process_statistics_virtual_memory_size", + "arangodb_server_statistics_physical_memory", + "arangodb_server_statistics_server_uptime_total", + ]; + + expected.forEach((name) => { + assertTrue(metrics.hasOwnProperty(name), name); + assertEqual("number", typeof metrics[name], name); + }); + }, + }; +} + +jsunity.run(testSuite); +return jsunity.done(); From 24bc4c20098ab0e5a6f53a6867048f1f81b9e507 Mon Sep 17 00:00:00 2001 From: Jan Date: Thu, 26 Aug 2021 15:45:42 +0200 Subject: [PATCH 3/5] properly rename test file (#14705) --- ...istics-spec-nocluster.js => api-statistics-spec-noncluster.js} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/js/client/shell/{api-statistics-spec-nocluster.js => api-statistics-spec-noncluster.js} (100%) diff --git a/tests/js/client/shell/api-statistics-spec-nocluster.js b/tests/js/client/shell/api-statistics-spec-noncluster.js similarity index 100% rename from tests/js/client/shell/api-statistics-spec-nocluster.js rename to tests/js/client/shell/api-statistics-spec-noncluster.js From 46e47320379b62599a34e001eaefa2c0a51e7f1c Mon Sep 17 00:00:00 2001 From: Andrei Lobov Date: Wed, 1 Sep 2021 22:05:06 +0300 Subject: [PATCH 4/5] Bug fix/search 238 late materialization with constrained heap (#14722) * add test for view case * find a better place for sort * fix formatting * add index case test * moar tests * temporary relaxed assertion --- CHANGELOG | 3 ++ arangod/Aql/OptimizerRules.cpp | 28 +++++++++++++++++-- ...-document-materialization-arangosearch.inc | 28 +++++++++++++++++++ ...izer-rule-late-document-materialization.js | 25 +++++++++++++++++ .../aql-queries-optimizer-limit-cluster.js | 12 +++++++- 5 files changed, 93 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 80e0a843bd05..aee2e70e1d7b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,9 @@ v3.8.1 (XXXX-XX-XX) ------------------- +* SEARCH-238: Improved SortNodes placement optimization in cluster so + late materialization could cover more cases + * Reduce internal priority of AQL execution. This prevents possible deadlocks with modification operations in a cluster and replicationFactor >= 2, and can also improve responsiveness under high load of AQL queries. diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 9f5d8cb82bd4..d4d18c1d53c6 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4870,6 +4870,7 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, OptimizerRule const& rule) { ::arangodb::containers::SmallVector::allocator_type::arena_type a; ::arangodb::containers::SmallVector nodes{a}; + VarSet usedBySort; plan->findNodesOfType(nodes, EN::GATHER, true); bool modified = false; @@ -4931,14 +4932,37 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, case EN::SORT: { auto thisSortNode = ExecutionNode::castTo(inspectNode); - + usedBySort.clear(); + thisSortNode->getVariablesUsedHere(usedBySort); // remember our cursor... parents = inspectNode->getParents(); // then unlink the filter/calculator from the plan plan->unlinkNode(inspectNode); // and re-insert into plan in front of the remoteNode if (thisSortNode->_reinsertInCluster) { - plan->insertDependency(rn, inspectNode); + // let's look for the best place for that SORT. + // We could skip over several calculations if + // they are not needed for our sort. So we could calculate + // more lazily and even make late materialization possible + ExecutionNode* insertPoint = rn; + auto current = insertPoint->getFirstDependency(); + while (current != nullptr && + current->getType() == EN::CALCULATION) { + auto nn = ExecutionNode::castTo(current); + if (!nn->expression()->isDeterministic()) { + // let's not touch non-deterministic calculation + // as results may depend on calls count and sort could change this + break; + } + auto variable = nn->outVariable(); + if (usedBySort.find(variable) == usedBySort.end()) { + insertPoint = current; + } else { + break; // first node used by sort. We should stop here. + } + current = current->getFirstDependency(); + } + plan->insertDependency(insertPoint, inspectNode); } gatherNode->elements(thisSortNode->elements()); diff --git a/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-arangosearch.inc b/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-arangosearch.inc index 7124660ee827..5161f8e3e7a8 100644 --- a/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-arangosearch.inc +++ b/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-arangosearch.inc @@ -380,6 +380,34 @@ }); assertEqual(0, expected.size); }, + testConstrainedSortOnDbServer() { + let query = "FOR d IN " + vn + " SEARCH d.value > 9 SORT BM25(d) LIMIT 10 RETURN {key: d._key, value: d.value}"; + let plan = AQL_EXPLAIN(query).plan; + assertNotEqual(-1, plan.rules.indexOf(ruleName)); + let materializeNodeFound = false; + let nodeDependency = null; + plan.nodes.forEach(function(node) { + if( node.type === "MaterializeNode") { + assertFalse(materializeNodeFound); + // FIXME: temporary relaxed assertion + // We need to fix node placement in oneshard mode + // first. And than we indeed will get expected node placement + // assertEqual(nodeDependency.type, isCluster ? "SortNode" : "LimitNode"); + assertTrue(nodeDependency.type === "SortNode" || nodeDependency.type === "LimitNode"); + materializeNodeFound = true; + } + nodeDependency = node; + }); + assertTrue(materializeNodeFound); + let result = AQL_EXECUTE(query); + assertEqual(4, result.json.length); + let expected = new Set(['c_0', 'c_1', 'c_2', 'c_3']); + result.json.forEach(function(doc) { + assertTrue(expected.has(doc.key)); + expected.delete(doc.key); + }); + assertEqual(0, expected.size); + } }; }; }()); diff --git a/tests/js/server/aql/aql-optimizer-rule-late-document-materialization.js b/tests/js/server/aql/aql-optimizer-rule-late-document-materialization.js index 77c088bc65be..2fc51cb7946a 100644 --- a/tests/js/server/aql/aql-optimizer-rule-late-document-materialization.js +++ b/tests/js/server/aql/aql-optimizer-rule-late-document-materialization.js @@ -607,6 +607,31 @@ function lateDocumentMaterializationRuleTestSuite () { let result = AQL_EXECUTE(query); assertEqual(1, result.json.length); assertEqual(result.json[0]._key, 'c0'); + }, + testConstrainedSortOnDbServer() { + let query = "FOR d IN " + prefixIndexCollectionName + " FILTER d.obj.b == {sb: 'b_val_0'} " + + "SORT d.obj.b LIMIT 10 RETURN {key: d._key, value: d.some_value_from_doc}"; + let plan = AQL_EXPLAIN(query).plan; + assertNotEqual(-1, plan.rules.indexOf(ruleName)); + let materializeNodeFound = false; + let nodeDependency = null; + plan.nodes.forEach(function(node) { + if( node.type === "MaterializeNode") { + assertFalse(materializeNodeFound); + assertEqual(nodeDependency.type, isCluster ? "SortNode" : "LimitNode"); + materializeNodeFound = true; + } + nodeDependency = node; + }); + assertTrue(materializeNodeFound); + let result = AQL_EXECUTE(query); + assertEqual(1, result.json.length); + let expected = new Set(['c0']); + result.json.forEach(function(doc) { + assertTrue(expected.has(doc.key)); + expected.delete(doc.key); + }); + assertEqual(0, expected.size); } }; } diff --git a/tests/js/server/aql/aql-queries-optimizer-limit-cluster.js b/tests/js/server/aql/aql-queries-optimizer-limit-cluster.js index 131918524cab..d7aef1c19b3f 100644 --- a/tests/js/server/aql/aql-queries-optimizer-limit-cluster.js +++ b/tests/js/server/aql/aql-queries-optimizer-limit-cluster.js @@ -601,7 +601,17 @@ function ahuacatlQueryOptimizerLimitTestSuite () { var actual = getQueryResults(query); assertEqual(0, actual.length); - assertEqual([ "SingletonNode", "EnumerateCollectionNode", "CalculationNode", "CalculationNode", "SortNode", "RemoteNode", "GatherNode", "LimitNode", "FilterNode", "ReturnNode" ], explain(query)); + assertEqual([ "SingletonNode", "EnumerateCollectionNode", "CalculationNode", "SortNode", "CalculationNode", + "RemoteNode", "GatherNode", "LimitNode", "FilterNode", "ReturnNode" ], explain(query)); + }, + testLimitFullCollectionSort3_DoubleCalculation : function () { + var query = "FOR c IN " + cn + " SORT c.value LIMIT 0, 10 FILTER c.value >= 20 && c.value < 30 RETURN c.value2"; + + var actual = getQueryResults(query); + assertEqual(0, actual.length); + + assertEqual([ "SingletonNode", "EnumerateCollectionNode", "CalculationNode", "SortNode", "CalculationNode", + "CalculationNode", "RemoteNode", "GatherNode", "LimitNode", "FilterNode", "ReturnNode" ], explain(query)); }, //////////////////////////////////////////////////////////////////////////////// From fe596923f727e5a857c75937c07044fe86636dc1 Mon Sep 17 00:00:00 2001 From: Vadim Date: Fri, 3 Sep 2021 20:16:24 +0300 Subject: [PATCH 5/5] Update CHANGELOG --- CHANGELOG | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index aee2e70e1d7b..84c194e9353e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,12 +1,12 @@ -v3.8.1 (XXXX-XX-XX) +v3.8.2 (XXXX-XX-XX) ------------------- -* SEARCH-238: Improved SortNodes placement optimization in cluster so - late materialization could cover more cases +* SEARCH-238: Improved SortNodes placement optimization in cluster so late + materialization could cover more cases. + -* Reduce internal priority of AQL execution. This prevents possible deadlocks - with modification operations in a cluster and replicationFactor >= 2, and can - also improve responsiveness under high load of AQL queries. +v3.8.1 (XXXX-XX-XX) +------------------- * Updated arangosync to 2.6.0.