From 958976ec9d73346720ce74ca70c00195bd93c0ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Thu, 10 Dec 2020 14:17:46 +0100 Subject: [PATCH 1/6] Fixed AR-113 and added regression tests --- arangod/Aql/OptimizerRules.cpp | 29 ++++++--- arangod/Cluster/RestClusterHandler.cpp | 2 - tests/js/server/aql/aql-optimizer-geoindex.js | 59 ++++++++++++++++++- 3 files changed, 79 insertions(+), 11 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index c85be5c74a5a..cd0623a1568a 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -6484,8 +6484,17 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, std::unique_ptraddPlan(std::move(plan), rule, modified); } -static bool isValueOrReference(AstNode const* node) { - return node->type == NODE_TYPE_VALUE || node->type == NODE_TYPE_REFERENCE; +static bool isValueOrAccessibleReference(VarSet const& validVars, AstNode const* node) { + switch (node->type) { + case NODE_TYPE_VALUE: + return true; + case NODE_TYPE_REFERENCE: { + auto const* const variable = static_cast(node->getData()); + return validVars.contains(variable); + } + default: + return false; + } } /// Essentially mirrors the geo::QueryParams struct, but with @@ -6787,11 +6796,12 @@ static bool checkGeoFilterFunction(ExecutionPlan* plan, AstNode const* funcNode, // checks if a node contanis a geo index function a valid operator // to use within a filter condition -bool checkGeoFilterExpression(ExecutionPlan* plan, AstNode const* node, GeoIndexInfo& info) { +bool checkGeoFilterExpression(ExecutionPlan* plan, VarSet const& validVars, + AstNode const* node, GeoIndexInfo& info) { // checks @first `smaller` @second // note: this only modifies "info" if the function returns true auto eval = [&](AstNode const* first, AstNode const* second, bool lessequal) -> bool { - if (isValueOrReference(second) && // no attribute access + if (isValueOrAccessibleReference(validVars, second) && // no attribute access info.maxDistanceExpr == nullptr && // max distance is not yet set checkDistanceFunc(plan, first, /*legacy*/ true, info)) { TRI_ASSERT(info.index); @@ -6799,7 +6809,7 @@ bool checkGeoFilterExpression(ExecutionPlan* plan, AstNode const* node, GeoIndex info.maxInclusive = info.maxInclusive && lessequal; info.nodesToRemove.insert(node); return true; - } else if (isValueOrReference(first) && // no attribute access + } else if (isValueOrAccessibleReference(validVars, first) && // no attribute access info.minDistanceExpr == nullptr && // min distance is not yet set checkDistanceFunc(plan, second, /*legacy*/ true, info)) { info.minDistanceExpr = first; @@ -6879,7 +6889,8 @@ static bool optimizeSortNode(ExecutionPlan* plan, SortNode* sort, GeoIndexInfo& } // checks a single sort or filter node -static void optimizeFilterNode(ExecutionPlan* plan, FilterNode* fn, GeoIndexInfo& info) { +static void optimizeFilterNode(ExecutionPlan* plan, VarSet const& validVars, + FilterNode* fn, GeoIndexInfo& info) { TRI_ASSERT(fn->getType() == EN::FILTER); // filter nodes always have one input variable @@ -6909,7 +6920,7 @@ static void optimizeFilterNode(ExecutionPlan* plan, FilterNode* fn, GeoIndexInfo if (!node->isSimpleComparisonOperator() && node->type != arangodb::aql::NODE_TYPE_FCALL) { return; } - if (checkGeoFilterExpression(plan, node, info)) { + if (checkGeoFilterExpression(plan, validVars, node, info)) { info.exesToModify.try_emplace(fn, expr); } }); @@ -7110,11 +7121,13 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, std::unique_ptr ExecutionNode* current = node->getFirstParent(); LimitNode* limit = nullptr; bool canUseSortLimit = true; + auto const& validVars = node->getVarsValid(); while (current) { if (current->getType() == EN::FILTER) { // picking up filter conditions is always allowed - optimizeFilterNode(plan.get(), ExecutionNode::castTo(current), info); + optimizeFilterNode(plan.get(), validVars, + ExecutionNode::castTo(current), info); } else if (current->getType() == EN::SORT && canUseSortLimit) { // only pick up a sort clause if we haven't seen another loop yet if (!optimizeSortNode(plan.get(), ExecutionNode::castTo(current), info)) { diff --git a/arangod/Cluster/RestClusterHandler.cpp b/arangod/Cluster/RestClusterHandler.cpp index eda8214b6e67..f4656da3982a 100644 --- a/arangod/Cluster/RestClusterHandler.cpp +++ b/arangod/Cluster/RestClusterHandler.cpp @@ -153,9 +153,7 @@ void RestClusterHandler::handleClusterInfo() { auto& ci = server().getFeature().clusterInfo(); auto dump = ci.toVelocyPack(); - LOG_DEVEL << dump.toJson(); generateResult(rest::ResponseCode::OK, dump.slice()); - } /// @brief returns information about all coordinator endpoints diff --git a/tests/js/server/aql/aql-optimizer-geoindex.js b/tests/js/server/aql/aql-optimizer-geoindex.js index 5f6b567c3534..289ab3aece64 100644 --- a/tests/js/server/aql/aql-optimizer-geoindex.js +++ b/tests/js/server/aql/aql-optimizer-geoindex.js @@ -1029,7 +1029,64 @@ function optimizerRuleTestSuite() { var result = AQL_EXPLAIN(query.string, query.bindVars); hasIndexNode(result, query); hasNoFilterNode(result, query); - } + }, + + // Regression test (https://arangodb.atlassian.net/browse/AR-113): + // Previously, the filter was moved into the index, but one access of `dist` in `dist < dist` was left, while the + // variable obviously can't be available yet at the index node. + // When failing, throws an error like + // missing variable #1 (dist) for node #8 (IndexNode) while planning registers + testSelfReference1: function () { + const query = { + string: ` + FOR doc IN @@col + LET dist = GEO_DISTANCE(doc.geo, [16, 53]) + FILTER dist < dist + RETURN {doc, dist} + `, + bindVars: {'@col': colName}, + }; + + const result = AQL_EXPLAIN(query.string, query.bindVars); + hasNoIndexNode(result, query); + hasFilterNode(result, query); + }, + + // See testSelfReference1 + testSelfReference2: function () { + const query = { + string: ` + FOR doc IN @@col + LET dist = GEO_DISTANCE(doc.geo, [16, 53]) + LET dist2 = GEO_DISTANCE(doc.geo, [3, 7]) + FILTER dist < dist2 + RETURN {doc, dist, dist2} + `, + bindVars: {'@col': colName}, + }; + + const result = AQL_EXPLAIN(query.string, query.bindVars); + hasNoIndexNode(result, query); + hasFilterNode(result, query); + }, + + // See testSelfReference1 + testSelfReference3: function () { + const query = { + string: ` + FOR doc IN @@col + LET dist = GEO_DISTANCE(doc.geo, [16, 53]) + LET dist2 = doc.maxDist + FILTER dist < dist2 + RETURN {doc, dist, dist2} + `, + bindVars: {'@col': colName}, + }; + + const result = AQL_EXPLAIN(query.string, query.bindVars); + hasNoIndexNode(result, query); + hasFilterNode(result, query); + }, }; // test dictionary (return) } // optimizerRuleTestSuite From cf4a4136bf1c9b587ca63ee89bc43cadeea6061f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Fri, 11 Dec 2020 09:35:44 +0100 Subject: [PATCH 2/6] Updated CHANGELOG --- CHANGELOG | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index a40d9e637fbd..f9adb57fa896 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,10 @@ devel ----- +* Fix AR-113. Add a check in the AQL geo-index-optimizer rule that vars + referenced by a candidate index expression are actually available at the + candidate index node. + * Make all Pregel HTTP and JavaScript APIs also accept stringified execution number values, in addition to numeric ones. From cadc8e6d86a54ae7027e4f0c0bd078c063c1ff82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Mon, 14 Dec 2020 10:47:45 +0100 Subject: [PATCH 3/6] Throw an exception in case of an unexpected type --- arangod/Aql/OptimizerRules.cpp | 8 ++++---- arangod/GeoIndex/Index.cpp | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index cd0623a1568a..f8a3010f2a0f 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -6801,16 +6801,16 @@ bool checkGeoFilterExpression(ExecutionPlan* plan, VarSet const& validVars, // checks @first `smaller` @second // note: this only modifies "info" if the function returns true auto eval = [&](AstNode const* first, AstNode const* second, bool lessequal) -> bool { - if (isValueOrAccessibleReference(validVars, second) && // no attribute access - info.maxDistanceExpr == nullptr && // max distance is not yet set + if (isValueOrAccessibleReference(validVars, second) && // no attribute access + info.maxDistanceExpr == nullptr && // max distance is not yet set checkDistanceFunc(plan, first, /*legacy*/ true, info)) { TRI_ASSERT(info.index); info.maxDistanceExpr = second; info.maxInclusive = info.maxInclusive && lessequal; info.nodesToRemove.insert(node); return true; - } else if (isValueOrAccessibleReference(validVars, first) && // no attribute access - info.minDistanceExpr == nullptr && // min distance is not yet set + } else if (isValueOrAccessibleReference(validVars, first) && // no attribute access + info.minDistanceExpr == nullptr && // min distance is not yet set checkDistanceFunc(plan, second, /*legacy*/ true, info)) { info.minDistanceExpr = first; info.minInclusive = info.minInclusive && lessequal; diff --git a/arangod/GeoIndex/Index.cpp b/arangod/GeoIndex/Index.cpp index 68a721011d8d..ac5791c305d4 100644 --- a/arangod/GeoIndex/Index.cpp +++ b/arangod/GeoIndex/Index.cpp @@ -260,6 +260,9 @@ void Index::handleNode(aql::AstNode const* node, aql::Variable const* ref, aql::AstNode const* max = node->getMemberUnchecked(1); TRI_ASSERT(max->type == aql::NODE_TYPE_VALUE); + if (max->type != aql::NODE_TYPE_VALUE) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_INVALID_GEO_VALUE); + } if (!max->isValueType(aql::VALUE_TYPE_STRING)) { qp.maxDistance = max->getDoubleValue(); } // else assert(max->getStringValue() == "unlimited") @@ -274,7 +277,6 @@ void Index::handleNode(aql::AstNode const* node, aql::Variable const* ref, if (!qp.origin.is_valid()) { THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_INVALID_GEO_VALUE); } - // LOG_TOPIC("a9633", ERR, Logger::FIXME) << "Found center: " << c.toString(); aql::AstNode const* min = node->getMemberUnchecked(1); TRI_ASSERT(min->type == aql::NODE_TYPE_VALUE); @@ -283,7 +285,7 @@ void Index::handleNode(aql::AstNode const* node, aql::Variable const* ref, } default: TRI_ASSERT(false); - break; + THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_INVALID_GEO_VALUE); } } From 10af99cd37e957d09c614f4d75d66fd8e7e79906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Mon, 14 Dec 2020 10:50:57 +0100 Subject: [PATCH 4/6] Restrict geo index optimizer rule to values --- arangod/Aql/OptimizerRules.cpp | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index f8a3010f2a0f..a4aaabf92c92 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -6484,19 +6484,6 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, std::unique_ptraddPlan(std::move(plan), rule, modified); } -static bool isValueOrAccessibleReference(VarSet const& validVars, AstNode const* node) { - switch (node->type) { - case NODE_TYPE_VALUE: - return true; - case NODE_TYPE_REFERENCE: { - auto const* const variable = static_cast(node->getData()); - return validVars.contains(variable); - } - default: - return false; - } -} - /// Essentially mirrors the geo::QueryParams struct, but with /// abstracts AstNode value objects struct GeoIndexInfo { @@ -6796,21 +6783,20 @@ static bool checkGeoFilterFunction(ExecutionPlan* plan, AstNode const* funcNode, // checks if a node contanis a geo index function a valid operator // to use within a filter condition -bool checkGeoFilterExpression(ExecutionPlan* plan, VarSet const& validVars, - AstNode const* node, GeoIndexInfo& info) { +bool checkGeoFilterExpression(ExecutionPlan* plan, AstNode const* node, GeoIndexInfo& info) { // checks @first `smaller` @second // note: this only modifies "info" if the function returns true auto eval = [&](AstNode const* first, AstNode const* second, bool lessequal) -> bool { - if (isValueOrAccessibleReference(validVars, second) && // no attribute access - info.maxDistanceExpr == nullptr && // max distance is not yet set + if (second->type == NODE_TYPE_VALUE && // only constants allowed + info.maxDistanceExpr == nullptr && // max distance is not yet set checkDistanceFunc(plan, first, /*legacy*/ true, info)) { TRI_ASSERT(info.index); info.maxDistanceExpr = second; info.maxInclusive = info.maxInclusive && lessequal; info.nodesToRemove.insert(node); return true; - } else if (isValueOrAccessibleReference(validVars, first) && // no attribute access - info.minDistanceExpr == nullptr && // min distance is not yet set + } else if (first->type == NODE_TYPE_VALUE && // only constants allowed + info.minDistanceExpr == nullptr && // min distance is not yet set checkDistanceFunc(plan, second, /*legacy*/ true, info)) { info.minDistanceExpr = first; info.minInclusive = info.minInclusive && lessequal; @@ -6889,8 +6875,7 @@ static bool optimizeSortNode(ExecutionPlan* plan, SortNode* sort, GeoIndexInfo& } // checks a single sort or filter node -static void optimizeFilterNode(ExecutionPlan* plan, VarSet const& validVars, - FilterNode* fn, GeoIndexInfo& info) { +static void optimizeFilterNode(ExecutionPlan* plan, FilterNode* fn, GeoIndexInfo& info) { TRI_ASSERT(fn->getType() == EN::FILTER); // filter nodes always have one input variable @@ -6920,7 +6905,7 @@ static void optimizeFilterNode(ExecutionPlan* plan, VarSet const& validVars, if (!node->isSimpleComparisonOperator() && node->type != arangodb::aql::NODE_TYPE_FCALL) { return; } - if (checkGeoFilterExpression(plan, validVars, node, info)) { + if (checkGeoFilterExpression(plan, node, info)) { info.exesToModify.try_emplace(fn, expr); } }); @@ -7121,13 +7106,11 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, std::unique_ptr ExecutionNode* current = node->getFirstParent(); LimitNode* limit = nullptr; bool canUseSortLimit = true; - auto const& validVars = node->getVarsValid(); while (current) { if (current->getType() == EN::FILTER) { // picking up filter conditions is always allowed - optimizeFilterNode(plan.get(), validVars, - ExecutionNode::castTo(current), info); + optimizeFilterNode(plan.get(), ExecutionNode::castTo(current), info); } else if (current->getType() == EN::SORT && canUseSortLimit) { // only pick up a sort clause if we haven't seen another loop yet if (!optimizeSortNode(plan.get(), ExecutionNode::castTo(current), info)) { From daa03a92e3d764ac4033ecbf98cba38f318f32ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Mon, 14 Dec 2020 13:42:21 +0100 Subject: [PATCH 5/6] Added a test --- tests/js/server/aql/aql-optimizer-geoindex.js | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/js/server/aql/aql-optimizer-geoindex.js b/tests/js/server/aql/aql-optimizer-geoindex.js index 289ab3aece64..0688e2264072 100644 --- a/tests/js/server/aql/aql-optimizer-geoindex.js +++ b/tests/js/server/aql/aql-optimizer-geoindex.js @@ -1088,6 +1088,28 @@ function optimizerRuleTestSuite() { hasFilterNode(result, query); }, + // See testSelfReference1 + // Note that currently, the optimizer rule does not work with `dist2` being + // not a value (but a variable reference) in the filter expression. When + // this changes in the future, the expectations in this test can simply be + // changed. + testInaccessibleReference: function () { + const query = { + string: ` + FOR doc IN @@col + LET dist = GEO_DISTANCE(doc.geo, [16, 53]) + LET dist2 = NOEVAL(7) + FILTER dist < dist2 + RETURN {doc, dist, dist2} + `, + bindVars: {'@col': colName}, + }; + + const result = AQL_EXPLAIN(query.string, query.bindVars); + hasNoIndexNode(result, query); + hasFilterNode(result, query); + }, + }; // test dictionary (return) } // optimizerRuleTestSuite From 74840b42f75a0f43e8875131ce9cf82e0259375f Mon Sep 17 00:00:00 2001 From: jsteemann Date: Mon, 14 Dec 2020 20:38:38 +0100 Subject: [PATCH 6/6] explicit fallthrough, added exception --- arangod/GeoIndex/Index.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arangod/GeoIndex/Index.cpp b/arangod/GeoIndex/Index.cpp index ac5791c305d4..fc2542a4ce5b 100644 --- a/arangod/GeoIndex/Index.cpp +++ b/arangod/GeoIndex/Index.cpp @@ -251,6 +251,7 @@ void Index::handleNode(aql::AstNode const* node, aql::Variable const* ref, case aql::NODE_TYPE_OPERATOR_BINARY_LE: qp.maxInclusive = true; // intentional fallthrough + [[fallthrough]]; case aql::NODE_TYPE_OPERATOR_BINARY_LT: { TRI_ASSERT(node->numMembers() == 2); qp.origin = Index::parseDistFCall(node->getMemberUnchecked(0), ref); @@ -271,6 +272,7 @@ void Index::handleNode(aql::AstNode const* node, aql::Variable const* ref, case aql::NODE_TYPE_OPERATOR_BINARY_GE: qp.minInclusive = true; // intentional fallthrough + [[fallthrough]]; case aql::NODE_TYPE_OPERATOR_BINARY_GT: { TRI_ASSERT(node->numMembers() == 2); qp.origin = Index::parseDistFCall(node->getMemberUnchecked(0), ref); @@ -280,6 +282,9 @@ void Index::handleNode(aql::AstNode const* node, aql::Variable const* ref, aql::AstNode const* min = node->getMemberUnchecked(1); TRI_ASSERT(min->type == aql::NODE_TYPE_VALUE); + if (min->type != aql::NODE_TYPE_VALUE) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_INVALID_GEO_VALUE); + } qp.minDistance = min->getDoubleValue(); break; }