8000 Add limits for AQL query complexity by jsteemann · Pull Request #14480 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

Add limits for AQL query complexity #14480

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jul 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
devel
-----

* Add hard-coded complexity limits for AQL queries, in order to prevent
programmatically generated large queries from causing trouble (too deep
recursion, enormous memory usage, long query optimization and distribution
passes etc.).
This change introduces 2 limits:
- a recursion limit for AQL query expressions. An expression can now be
up to 500 levels deep. An example expression is `1 + 2 + 3 + 4`, which
is 3 levels deep `1 + (2 + (3 + 4))`.
The expression recursion is limited to 500 levels.
- a limit for the number of execution nodes in the initial query
execution plan.
The number of execution nodes is limited to 4,000.

* Always remove blocker object for revision trees in case of replication
failures.

Expand Down
17 changes: 16 additions & 1 deletion arangod/Aql/Ast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2055,8 +2055,9 @@ void Ast::validateAndOptimize(transaction::Methods& trx) {
AqlFunctionsInternalCache aqlFunctionsInternalCache;
transaction::Methods& trx;
int64_t stopOptimizationRequests = 0;
int64_t nestingLevel = 0;
int64_t nestingLevel = 0; // only used for subqueries
int64_t filterDepth = -1; // -1 = not in filter
uint64_t recursionDepth = 0; // current depth of the tree we walk
bool hasSeenAnyWriteNode = false;
bool hasSeenWriteNodeInCurrentScope = false;
};
Expand All @@ -2065,6 +2066,7 @@ void Ast::validateAndOptimize(transaction::Methods& trx) {

auto preVisitor = [&](AstNode const* node) -> bool {
auto ctx = &context;

if (ctx->filterDepth >= 0) {
++ctx->filterDepth;
}
Expand Down 10000 Expand Up @@ -2120,12 +2122,22 @@ void Ast::validateAndOptimize(transaction::Methods& trx) {
// nothing to be done in constant arrays
return false;
}

if (++ctx->recursionDepth > maxExpressionNesting) {
// maximum nesting level for expressions/other constructs reached.
// we abort to prevent a potential stack overflow
THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_TOO_MUCH_NESTING);
}

return true;
};

auto postVisitor = [&](AstNode const* node) -> void {
auto ctx = &context;

TRI_ASSERT(ctx->recursionDepth > 0);
--ctx->recursionDepth;

if (ctx->filterDepth >= 0) {
--ctx->filterDepth;
}
Expand Down Expand Up @@ -2345,6 +2357,9 @@ void Ast::validateAndOptimize(transaction::Methods& trx) {

// run the optimizations
_root = traverseAndModify(_root, preVisitor, visitor, postVisitor);

// must be back to 0 after the traversal
TRI_ASSERT(context.recursionDepth == 0);
}

/// @brief determines the variables referenced in an expression
Expand Down
3 changes: 3 additions & 0 deletions arangod/Aql/Ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ class Ast {
/// @brief destroy the AST
~Ast();

/// @brief maximum nesting level for expressions
static constexpr uint64_t maxExpressionNesting = 500;

/// @brief return the query
QueryContext& query() const { return _query; }

Expand Down
7 changes: 7 additions & 0 deletions arangod/Aql/ExecutionPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2179,6 +2179,13 @@ ExecutionNode* ExecutionPlan::fromNode(AstNode const* node) {
if (en == nullptr) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "type not handled");
}

if (_ids.size() > maxPlanNodes) {
// maximum number of execution nodes reached
// we have to limit this to prevent super-long runtimes for query
// optimization and execution
THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_TOO_MUCH_NESTING);
}
}

return en;
Expand Down
7 changes: 6 additions & 1 deletion arangod/Aql/ExecutionPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,13 @@ class ExecutionPlan {

/// @brief destroy the plan, frees all assigned nodes
~ExecutionPlan();

/// @brief maximum number of execution nodes allowed per query
/// (at the time the initial execution plan is created). we have to limit
/// this to prevent super-long runtimes for query optimization and
/// execution)
static constexpr uint64_t maxPlanNodes = 4000;

public:
/// @brief create an execution plan from an AST
/// note: tracking memory usage requires accessing the Ast/Query objects,
/// which can be inherently unsafe when running within the gtest unit tests.
Expand Down
1 change: 1 addition & 0 deletions js/common/bootstrap/errors.js
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@
"ERROR_QUERY_VARIABLE_NAME_UNKNOWN" : { "code" : 1512, "message" : "unknown variable '%s'" },
"ERROR_QUERY_COLLECTION_LOCK_FAILED" : { "code" : 1521, "message" : "unable to read-lock collection %s" },
"ERROR_QUERY_TOO_MANY_COLLECTIONS" : { "code" : 1522, "message" : "too many collections/shards" },
"ERROR_QUERY_TOO_MUCH_NESTING" : { "code" : 1524, "message" : "too much nesting or too many objects" },
"ERROR_QUERY_INVALID_OPTIONS_ATTRIBUTE" : { "code" : 1539, "message" : "unknown OPTIONS attribute used" },
"ERROR_QUERY_FUNCTION_NAME_UNKNOWN" : { "code" : 1540, "message" : "usage of unknown function '%s()'" },
"ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH" : { "code" : 1541, "message" : "invalid number of arguments for function '%s()', expected number of arguments: minimum: %d, maximum: %d" },
Expand Down
4 changes: 3 additions & 1 deletion lib/Basics/error-registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ struct elsa<ErrorCode> {
#include <cinttypes>

namespace arangodb::error {
constexpr static frozen::unordered_map<ErrorCode, const char*, 345> ErrorMessages = {
constexpr static frozen::unordered_map<ErrorCode, const char*, 346> ErrorMessages = {
{TRI_ERROR_NO_ERROR, // 0
"no error"},
{TRI_ERROR_FAILED, // 1
Expand Down Expand Up @@ -383,6 +383,8 @@ constexpr static frozen::unordered_map<ErrorCode, const char*, 345> ErrorMessage
"unable to read-lock collection %s"},
{TRI_ERROR_QUERY_TOO_MANY_COLLECTIONS, // 1522
"too many collections/shards"},
{TRI_ERROR_QUERY_TOO_MUCH_NESTING, // 1524
"too much nesting or too many objects"},
{TRI_ERROR_QUERY_INVALID_OPTIONS_ATTRIBUTE, // 1539
"unknown OPTIONS attribute used"},
{TRI_ERROR_QUERY_FUNCTION_NAME_UNKNOWN, // 1540
Expand Down
1 change: 1 addition & 0 deletions lib/Basics/errors.dat
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ ERROR_QUERY_VARIABLE_REDECLARED,1511,"variable '%s' is assigned multiple times",
ERROR_QUERY_VARIABLE_NAME_UNKNOWN,1512,"unknown variable '%s'","Will be raised when an unknown variable is used or the variable is undefined the context it is used."
ERROR_QUERY_COLLECTION_LOCK_FAILED,1521,"unable to read-lock collection %s","Will be raised when a read lock on the collection cannot be acquired."
ERROR_QUERY_TOO_MANY_COLLECTIONS,1522,"too many collections/shards","Will be raised when the number of collections or shards in a query is beyond the allowed value."
ERROR_QUERY_TOO_MUCH_NESTING,1524,"too much nesting or too many objects","Will be raised when a query contains expressions or other constructs with too many objects or that are too deeply nested."
ERROR_QUERY_INVALID_OPTIONS_ATTRIBUTE,1539,"unknown OPTIONS attribute used","Will be raised when an unknown attribute is used inside an OPTIONS clause."
ERROR_QUERY_FUNCTION_NAME_UNKNOWN,1540,"usage of unknown function '%s()'","Will be raised when an undefined function is called."
ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH,1541,"invalid number of arguments for function '%s()', expected number of arguments: minimum: %d, maximum: %d","Will be raised when the number of arguments used in a function call does not match the expected number of arguments for the function."
Expand Down
6 changes: 6 additions & 0 deletions lib/Basics/voc-errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,12 @@ constexpr auto TRI_ERROR_QUERY_COLLECTION_LOCK_FAILED
/// beyond the allowed value.
constexpr auto TRI_ERROR_QUERY_TOO_MANY_COLLECTIONS = ErrorCode{1522};

/// 1524: ERROR_QUERY_TOO_MUCH_NESTING
/// "too much nesting or too many objects"
/// Will be raised when a query contains expressions or other constructs with
/// too many objects or that are too deeply nested.
constexpr auto TRI_ERROR_QUERY_TOO_MUCH_NESTING = ErrorCode{1524};

/// 1539: ERROR_QUERY_INVALID_OPTIONS_ATTRIBUTE
/// "unknown OPTIONS attribute used"
/// Will be raised when an unknown attribute is used inside an OPTIONS clause.
Expand Down
1 change: 1 addition & 0 deletions lib/Rest/GeneralResponse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ rest::ResponseCode GeneralResponse::responseCode(ErrorCode code) {
case static_cast<int>(TRI_ERROR_QUERY_FAIL_CALLED):
case static_cast<int>(TRI_ERROR_QUERY_INVALID_DATE_VALUE):
case static_cast<int>(TRI_ERROR_QUERY_MULTI_MODIFY):
case static_cast<int>(TRI_ERROR_QUERY_TOO_MUCH_NESTING):
case static_cast<int>(TRI_ERROR_QUERY_COMPILE_TIME_OPTIONS):
case static_cast<int>(TRI_ERROR_QUERY_INVALID_OPTIONS_ATTRIBUTE):
case static_cast<int>(TRI_ERROR_QUERY_DISALLOWED_DYNAMIC_CALL):
Expand Down
152 changes: 152 additions & 0 deletions tests/Aql/QueryLimitsTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2020 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Steemann
////////////////////////////////////////////////////////////////////////////////

#include "ApplicationFeatures/ApplicationServer.h"
#include "Aql/Ast.h"
#include "Aql/ExecutionPlan.h"
#include "Aql/Query.h"
#include "Aql/QueryString.h"
#include "Transaction/StandaloneContext.h"
#include "Utils/ExecContext.h"
#include "VocBase/LogicalCollection.h"
#include "VocBase/VocbaseInfo.h"
#include "VocBase/vocbase.h"

#include <velocypack/Builder.h>
#include <velocypack/Parser.h>
#include <velocypack/Slice.h>

#include "gtest/gtest.h"
#include "Mocks/Servers.h"

namespace {

class AqlQueryLimitsTest
: public ::testing::Test,
public arangodb::tests::LogSuppressor<arangodb::Logger::AUTHENTICATION, arangodb::LogLevel::ERR> {
protected:
arangodb::tests::mocks::MockAqlServer server;

public:
AqlQueryLimitsTest() : server(false) {
server.startFeatures();
}

arangodb::aql::QueryResult executeQuery(TRI_vocbase_t& vocbase, std::string const& queryString,
std::shared_ptr<arangodb::velocypack::Builder> bindVars = nullptr,
std::string const& optionsString = "{}") {
auto ctx = std::make_shared<arangodb::transaction::StandaloneContext>(vocbase);
arangodb::aql::Query query(ctx, arangodb::aql::QueryString(queryString), bindVars,
arangodb::velocypack::Parser::fromJson(optionsString)->slice());

arangodb::aql::QueryResult result;
while (true) {
auto state = query.execute(result);
if (state == arangodb::aql::ExecutionState::WAITING) {
query.sharedState()->waitForAsyncWakeup();
} else {
break;
}
}
return result;
}
};

TEST_F(AqlQueryLimitsTest, testManyNodes) {
arangodb::CreateDatabaseInfo testDBInfo(server.server(), arangodb::ExecContext::current());
testDBInfo.load("testVocbase", 2);
TRI_vocbase_t vocbase(TRI_vocbase_type_e::TRI_VOCBASE_TYPE_NORMAL, std::move(testDBInfo));

std::string query("LET x = NOOPT('testi')\n");
size_t cnt = arangodb::aql::ExecutionPlan::maxPlanNodes - 4; // singleton + calculation + calculation + return
for (size_t i = 1; i <= cnt; ++i) {
query.append("FILTER x\n");
}
query.append("RETURN 1");

auto queryResult = executeQuery(vocbase, query);

ASSERT_TRUE(queryResult.result.ok());
auto slice = queryResult.data->slice();
EXPECT_TRUE(slice.isArray());
EXPECT_EQ(1, slice.length());
EXPECT_EQ(1, slice[0].getNumber<int64_t>());
}

TEST_F(AqlQueryLimitsTest, testTooManyNodes) {
arangodb::CreateDatabaseInfo testDBInfo(server.server(), arangodb::ExecContext::current());
testDBInfo.load("testVocbase", 2);
TRI_vocbase_t vocbase(TRI_vocbase_type_e::TRI_VOCBASE_TYPE_NORMAL, std::move(testDBInfo));

std::string query("LET x = NOOPT('testi')\n");
size_t cnt = arangodb::aql::ExecutionPlan::maxPlanNodes;
for (size_t i = 1; i <= cnt; ++i) {
query.append("FILTER x\n");
}
query.append("RETURN 1");

auto queryResult = executeQuery(vocbase, query);

ASSERT_FALSE(queryResult.result.ok());
ASSERT_EQ(TRI_ERROR_QUERY_TOO_MUCH_NESTING, queryResult.result.errorNumber());
}

TEST_F(AqlQueryLimitsTest, testDeepRecursion) {
arangodb::CreateDatabaseInfo testDBInfo(server.server(), arangodb::ExecContext::current());
testDBInfo.load("testVocbase", 2);
TRI_vocbase_t vocbase(TRI_vocbase_type_e::TRI_VOCBASE_TYPE_NORMAL, std::move(testDBInfo));

std::string query("RETURN 0");
size_t cnt = arangodb::aql::Ast::maxExpressionNesting - 2;
for (size_t i = 1; i <= cnt; ++i) {
query.append(" + ");
query.append(std::to_string(i));
}

auto queryResult = executeQuery(vocbase, query);

ASSERT_TRUE(queryResult.result.ok());
auto slice = queryResult.data->slice();
EXPECT_TRUE(slice.isArray());
EXPECT_EQ(1, slice.length());
EXPECT_EQ(124251, slice[0].getNumber<int64_t>());
}

TEST_F(AqlQueryLimitsTest, testTooDeepRecursion) {
arangodb::CreateDatabaseInfo testDBInfo(server.server(), arangodb::ExecContext::current());
testDBInfo.load("testVocbase", 2);
TRI_vocbase_t vocbase(TRI_vocbase_type_e::TRI_VOCBASE_TYPE_NORMAL, std::move(testDBInfo));

std::string query("RETURN 0");
size_t cnt = arangodb::aql::Ast::maxExpressionNesting;
for (size_t i = 1; i <= cnt; ++i) {
query.append(" + ");
query.append(std::to_string(i));
}

auto queryResult = executeQuery(vocbase, query);
ASSERT_FALSE(queryResult.result.ok());
ASSERT_EQ(TRI_ERROR_QUERY_TOO_MUCH_NESTING, queryResult.result.errorNumber());
}

}
3 changes: 2 additions & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,9 @@ set(ARANGODB_TESTS_SOURCES
Aql/NgramPosSimilarityFunctionTest.cpp
Aql/NoResultsExecutorTest.cpp
Aql/ProjectionsTest.cpp
Aql/QueryHelper.cpp
Aql/QueryCursorTest.cpp
Aql/QueryHelper.cpp
Aql/QueryLimitsTest.cpp
Aql/RegisterPlanTest.cpp
Aql/RemoteExecutorTest.cpp
Aql/RemoveExecutorTest.cpp
Expand Down
53 changes: 48 additions & 5 deletions tests/js/server/aql/aql-queries-simple.js
Original file line number Diff line number Diff line change
Expand Up @@ -1358,14 +1358,57 @@ function ahuacatlQuerySimpleTestSuite () {
const expected = _.range(cnt, cnt + 10);
assertEqual(expected, AQL_EXECUTE(q, {}, {optimizer: {rules: ['-all']}}).json);
},

testQueryWithManyInitializeCursors : function() {
let q = "LET x = NOOPT([1])\n";
const cnt = 999;
for (let i = 0; i < cnt; ++i) {
q += `FOR s${i} IN x\n `;
}
q += "RETURN 1";
assertEqual([1], AQL_EXECUTE(q, {}, {optimizer: {rules: ['-all']}}).json);
},

testQueryWithManyNodes : function() {
let q = "LET x = NOOPT('testi')\n";
const cnt = 4000 - 4; // singleton + calculation + calculation + return
for (let i = 0; i < cnt; ++i) {
q += `FILTER x\n `;
}
q += "RETURN 1";
assertEqual([1], AQL_EXECUTE(q, {}, {optimizer: {rules: ['-all']}}).json);
},

testQueryWithTooManyNodes : function() {
let q = "LET x = NOOPT('testi')\n";
const cnt = 4000; // plus singleton + calculation + calculation + return
for (let i = 0; i < cnt; ++i) {
q += `FILTER x\n `;
}
q += "RETURN 1";
assertQueryError(errors.ERROR_QUERY_TOO_MUCH_NESTING.code, q);
},

testQueryWithDeepExpression : function() {
let q = "RETURN 0";
const cnt = 500 - 2;
for (let i = 1; i <= cnt; ++i) {
q += " + " + i;
}
assertEqual([124251], AQL_EXECUTE(q, {}, {optimizer: {rules: ['-all']}}).json);
},

testQueryWithTooDeepExpression : function() {
let q = "RETURN 0";
const cnt = 500;
for (let i = 0; i < cnt; ++i) {
q += " + " + i;
}
assertQueryError(errors.ERROR_QUERY_TOO_MUCH_NESTING.code, q);
},
};
}

////////////////////////////////////////////////////////////////////////////////
/// @brief executes the test suite
////////////////////////////////////////////////////////////////////////////////

jsunity.run(ahuacatlQuerySimpleTestSuite);

return jsunity.done();

0